Load packages and set working directory

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(scater)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colMeans, colnames, colSums, dirname, do.call, duplicated,
##     eval, evalq, Filter, Find, get, grep, grepl, intersect,
##     is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
##     paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
##     Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## Loading required package: BiocParallel
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply
## 
## Attaching package: 'scater'
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     arrange, filter, mutate, rename
## The following object is masked from 'package:stats':
## 
##     filter
library(dplyr)
library(limma)
## 
## Attaching package: 'limma'
## The following object is masked from 'package:scater':
## 
##     plotMDS
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
library(SingleCellExperiment)
library(Seurat)
## Loading required package: cowplot
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
## The following object is masked from 'package:tidyr':
## 
##     expand
library(mclust)
## Package 'mclust' version 5.4.2
## Type 'citation("mclust")' for citing this R package in publications.
## 
## Attaching package: 'mclust'
## The following object is masked from 'package:purrr':
## 
##     map
library(Matrix)
library(tibble)
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following object is masked from 'package:S4Vectors':
## 
##     first
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(edgeR)
## 
## Attaching package: 'edgeR'
## The following object is masked from 'package:SingleCellExperiment':
## 
##     cpm
## set working directory
setwd("/media/data/mattb/projects/Brian_scRNAseq_website/R_projects/mouse_hypothalamus_scRNAseq")

Analysis of Campbell scRNAseq data for Chow fed mice (5 replicates) using seurat

Load CHOW mouse data

## Load seurat object
seurat_campbell_chow<- readRDS(file = "./seurat_campbell_chow_just_created.rds")

seurat_campbell_chow
## An object of class seurat in project CAMPBELL_CHOW 
##  30000 genes across 11255 samples.

Quality control on CHOW mouse data

## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)


## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url


## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_chow@raw.data[mito_genes, ]

mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]

seurat_campbell_chow@raw.data[mito_genes_present, 1:5]
##                    SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000028                       1                       0
## ENSMUSG00000000037                       0                       0
## ENSMUSG00000000148                       0                       0
## ENSMUSG00000000142                       0                       0
## ENSMUSG00000000183                       0                       0
## ENSMUSG00000000149                       0                       0
## ENSMUSG00000000131                       0                       0
## ENSMUSG00000000125                       0                       0
## ENSMUSG00000000085                       0                       0
## ENSMUSG00000000171                       0                       0
## ENSMUSG00000000093                       0                       0
## ENSMUSG00000000157                       0                       0
## ENSMUSG00000000094                       0                       0
## ENSMUSG00000000127                       0                       0
## ENSMUSG00000000103                       0                       0
## ENSMUSG00000000182                       0                       0
## ENSMUSG00000000058                       0                       0
## ENSMUSG00000000154                       0                       0
## ENSMUSG00000000159                       0                       3
## ENSMUSG00000000056                       0                       0
## ENSMUSG00000000049                       0                       0
## ENSMUSG00000000168                       0                       0
## ENSMUSG00000000134                       0                       0
## ENSMUSG00000000078                       0                       0
## ENSMUSG00000000001                       0                       0
## ENSMUSG00000000194                       0                       0
## ENSMUSG00000000126                       0                       0
## ENSMUSG00000000167                       0                       0
## ENSMUSG00000000184                       0                       0
## ENSMUSG00000000088                       2                       0
## ENSMUSG00000000120                       0                       0
## ENSMUSG00000000031                       0                       0
##                    SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000028                       0                       0
## ENSMUSG00000000037                       0                       0
## ENSMUSG00000000148                       0                       0
## ENSMUSG00000000142                       0                       0
## ENSMUSG00000000183                       0                       0
## ENSMUSG00000000149                       0                       0
## ENSMUSG00000000131                       0                       0
## ENSMUSG00000000125                       0                       0
## ENSMUSG00000000085                       1                       0
## ENSMUSG00000000171                       0                       3
## ENSMUSG00000000093                       0                       0
## ENSMUSG00000000157                       0                       0
## ENSMUSG00000000094                       0                       0
## ENSMUSG00000000127                       0                       0
## ENSMUSG00000000103                       0                       0
## ENSMUSG00000000182                       0                       0
## ENSMUSG00000000058                       0                       0
## ENSMUSG00000000154                       0                       0
## ENSMUSG00000000159                       0                      11
## ENSMUSG00000000056                       0                       0
## ENSMUSG00000000049                       0                       0
## ENSMUSG00000000168                       0                       0
## ENSMUSG00000000134                       0                       0
## ENSMUSG00000000078                       3                       0
## ENSMUSG00000000001                       0                       0
## ENSMUSG00000000194                       0                       0
## ENSMUSG00000000126                       0                       0
## ENSMUSG00000000167                       0                       0
## ENSMUSG00000000184                       0                       0
## ENSMUSG00000000088                       0                       2
## ENSMUSG00000000120                       0                       0
## ENSMUSG00000000031                       0                       0
##                    SRR5164436_AAAACACTTCAT
## ENSMUSG00000000028                       0
## ENSMUSG00000000037                       0
## ENSMUSG00000000148                       0
## ENSMUSG00000000142                       0
## ENSMUSG00000000183                       0
## ENSMUSG00000000149                       0
## ENSMUSG00000000131                       2
## ENSMUSG00000000125                       0
## ENSMUSG00000000085                       1
## ENSMUSG00000000171                       1
## ENSMUSG00000000093                       0
## ENSMUSG00000000157                       0
## ENSMUSG00000000094                       0
## ENSMUSG00000000127                       0
## ENSMUSG00000000103                       0
## ENSMUSG00000000182                       0
## ENSMUSG00000000058                       0
## ENSMUSG00000000154                       0
## ENSMUSG00000000159                      14
## ENSMUSG00000000056                       0
## ENSMUSG00000000049                       0
## ENSMUSG00000000168                       3
## ENSMUSG00000000134                       0
## ENSMUSG00000000078                       0
## ENSMUSG00000000001                       0
## ENSMUSG00000000194                       0
## ENSMUSG00000000126                       0
## ENSMUSG00000000167                       0
## ENSMUSG00000000184                       0
## ENSMUSG00000000088                       2
## ENSMUSG00000000120                       0
## ENSMUSG00000000031                       0
dim(seurat_campbell_chow@raw.data[mito_genes_present, ])
## [1]    32 11255
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_chow@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_chow@raw.data)


## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0006089 0.0011086 0.0013382 0.0018298 0.0106698
## add percentage mitocondrial genes into metadata
seurat_campbell_chow <- AddMetaData(object = seurat_campbell_chow,
                    metadata = percent_mito,
                    col.name = "percent_mito")


## Look at the seurat object meta data
head(seurat_campbell_chow@meta.data)
##                         nGene  nUMI orig.ident replicate_name percent_mito
## SRR5164436_AAAAAATGCATG  1158  2043 SRR5164436     SRR5164436 0.0014684288
## SRR5164436_AAAAACACGACG  2171  4531 SRR5164436     SRR5164436 0.0006623979
## SRR5164436_AAAAAGAAAAAT  1286  2154 SRR5164436     SRR5164436 0.0018578727
## SRR5164436_AAAAATGCACTA  3464  8755 SRR5164436     SRR5164436 0.0018289895
## SRR5164436_AAAACACTTCAT  3626 10143 SRR5164436     SRR5164436 0.0022691397
## SRR5164436_AAAACGAACATG  3775 10500 SRR5164436     SRR5164436 0.0012388031
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_chow,
        features.plot = c("nGene", "nUMI", "percent_mito"),
        nCol = 3,
        x.lab.rot = TRUE,
        point.size.use = 0.2
        )

## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)

GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)

Filter cells after QC

Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 6000 genes expressed.

## manual check; I already know all cells have >800 genes
table(seurat_campbell_chow@meta.data$percent_mito < 0.004 & seurat_campbell_chow@meta.data$nGene<6000)
## 
## FALSE  TRUE 
##   358 10897
# FALSE  TRUE 
#  358 10897 
 
## Filter cells with <0.4% percent_mito and <6000 genes
seurat_campbell_chow <- FilterCells(object = seurat_campbell_chow,
                    subset.names = c("nGene", "percent_mito"),
                    low.thresholds = c(800, -Inf),
                    high.thresholds = c(6000, 0.004))
 
seurat_campbell_chow
## An object of class seurat in project CAMPBELL_CHOW 
##  30000 genes across 10897 samples.
# An object of class seurat in project CAMPBELL_CHOW 
# 30000 genes across 10897 samples.
# 358 cells are filtered out; numbers consistent with above

Log normalise gene expression per cell

## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_chow@data),
     breaks = 100,
     main = "Total expression before normalisation",
     xlab = "Sum of expression")

## Normalise gene expression per cell
seurat_campbell_chow <- NormalizeData(object = seurat_campbell_chow, normalization.method = "LogNormalize", 
    scale.factor = 1e4)

## Plot graph of total expression after normalisation
hist(colSums(seurat_campbell_chow@data),
     breaks = 100,
     main = "Total expression after normalisation",
     xlab = "Sum of expression")

Find genes whose expression varies between cells.

Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.

## Find variable genes by expression
seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow,
                          mean.function = ExpMean,
                          dispersion.function = LogVMR,
                          x.low.cutoff = 0.05,
                          x.high.cutoff = 4,
                          y.cutoff = 0.75,
                          num.bin = 20,
                          binning.method = "equal_width"
                          )

# number of variable genes
length(seurat_campbell_chow@var.genes)
## [1] 1817

seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1817 variable genes

seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 2346 variable genes

seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 4084 variable genes

Scale normalised gene expression per cell to remove unwanted sources of variation

Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.

## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_chow <- ScaleData(object = seurat_campbell_chow, vars.to.regress = c("nUMI", "percent_mito", "replicate_name"))
## Regressing out: nUMI, percent_mito, replicate_name
## 
## Time Elapsed:  1.362351167202 mins
## Scaling data matrix

Principal component anlysis of variable genes.

Principal component anlysis of variable genes for use in cell clustering.

## Perform principal component analysis on variable genes
seurat_campbell_chow <- RunPCA(object = seurat_campbell_chow,
               pc.genes = seurat_campbell_chow@var.genes,
               do.print = TRUE,
               pcs.print = 1:5,
               genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000025468" "ENSMUSG00000043388" "ENSMUSG00000027350"
## [4] "ENSMUSG00000044349" "ENSMUSG00000024261"
## [1] ""
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000018593" "ENSMUSG00000026701"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000067786" "ENSMUSG00000055254" "ENSMUSG00000031760"
## [4] "ENSMUSG00000026701" "ENSMUSG00000021379"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000036896"
## [4] "ENSMUSG00000036887" "ENSMUSG00000060802"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
## [4] "ENSMUSG00000076439" "ENSMUSG00000036634"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
## [4] "ENSMUSG00000007682" "ENSMUSG00000034810"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
## [4] "ENSMUSG00000027800" "ENSMUSG00000094800"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_chow, pcs.use = 1:9)

## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_chow, dim.1 = 1, dim.2 = 2)

## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_chow, dim.1 = 2, dim.2 = 3)

## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_chow,
          pc.use = 5:18,
          cells.use = 500,
          do.balanced = TRUE,
          label.columns = FALSE)

## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_chow <- ProjectPCA(object = seurat_campbell_chow, do.print = TRUE)
## [1] "PC1"
##  [1] "ENSMUSG00000033061" "ENSMUSG00000047261" "ENSMUSG00000050711"
##  [4] "ENSMUSG00000027581" "ENSMUSG00000055430" "ENSMUSG00000019986"
##  [7] "ENSMUSG00000026576" "ENSMUSG00000021087" "ENSMUSG00000024268"
## [10] "ENSMUSG00000025468" "ENSMUSG00000043388" "ENSMUSG00000040785"
## [13] "ENSMUSG00000042750" "ENSMUSG00000027500" "ENSMUSG00000029223"
## [16] "ENSMUSG00000022577" "ENSMUSG00000036699" "ENSMUSG00000027350"
## [19] "ENSMUSG00000059361" "ENSMUSG00000019923" "ENSMUSG00000044349"
## [22] "ENSMUSG00000035964" "ENSMUSG00000027273" "ENSMUSG00000018965"
## [25] "ENSMUSG00000039278" "ENSMUSG00000071658" "ENSMUSG00000024261"
## [28] "ENSMUSG00000024423" "ENSMUSG00000000159" "ENSMUSG00000031840"
## [1] ""
##  [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
##  [4] "ENSMUSG00000018593" "ENSMUSG00000026701" "ENSMUSG00000000567"
##  [7] "ENSMUSG00000050953" "ENSMUSG00000001025" "ENSMUSG00000031762"
## [10] "ENSMUSG00000035805" "ENSMUSG00000026728" "ENSMUSG00000067786"
## [13] "ENSMUSG00000095538" "ENSMUSG00000022528" "ENSMUSG00000058135"
## [16] "ENSMUSG00000005360" "ENSMUSG00000018102" "ENSMUSG00000027712"
## [19] "ENSMUSG00000029838" "ENSMUSG00000021250" "ENSMUSG00000030342"
## [22] "ENSMUSG00000032231" "ENSMUSG00000034467" "ENSMUSG00000053931"
## [25] "ENSMUSG00000008540" "ENSMUSG00000036570" "ENSMUSG00000026649"
## [28] "ENSMUSG00000017009" "ENSMUSG00000063564" "ENSMUSG00000059970"
## [1] ""
## [1] ""
## [1] "PC2"
##  [1] "ENSMUSG00000018451" "ENSMUSG00000021270" "ENSMUSG00000067786"
##  [4] "ENSMUSG00000055254" "ENSMUSG00000026223" "ENSMUSG00000031428"
##  [7] "ENSMUSG00000037926" "ENSMUSG00000052727" "ENSMUSG00000031760"
## [10] "ENSMUSG00000101111" "ENSMUSG00000037852" "ENSMUSG00000039278"
## [13] "ENSMUSG00000026701" "ENSMUSG00000031633" "ENSMUSG00000021379"
## [16] "ENSMUSG00000035805" "ENSMUSG00000034467" "ENSMUSG00000017390"
## [19] "ENSMUSG00000055430" "ENSMUSG00000025666" "ENSMUSG00000019986"
## [22] "ENSMUSG00000026649" "ENSMUSG00000095538" "ENSMUSG00000040785"
## [25] "ENSMUSG00000046432" "ENSMUSG00000079037" "ENSMUSG00000015222"
## [28] "ENSMUSG00000042750" "ENSMUSG00000000567" "ENSMUSG00000049154"
## [1] ""
##  [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000036896"
##  [4] "ENSMUSG00000036887" "ENSMUSG00000060802" "ENSMUSG00000079018"
##  [7] "ENSMUSG00000030579" "ENSMUSG00000075602" "ENSMUSG00000028581"
## [10] "ENSMUSG00000029484" "ENSMUSG00000058715" "ENSMUSG00000026365"
## [13] "ENSMUSG00000024621" "ENSMUSG00000024397" "ENSMUSG00000036256"
## [16] "ENSMUSG00000029622" "ENSMUSG00000064373" "ENSMUSG00000021423"
## [19] "ENSMUSG00000022584" "ENSMUSG00000023992" "ENSMUSG00000015852"
## [22] "ENSMUSG00000056492" "ENSMUSG00000038642" "ENSMUSG00000001123"
## [25] "ENSMUSG00000040584" "ENSMUSG00000020154" "ENSMUSG00000016494"
## [28] "ENSMUSG00000046805" "ENSMUSG00000036353" "ENSMUSG00000041378"
## [1] ""
## [1] ""
## [1] "PC3"
##  [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
##  [4] "ENSMUSG00000076439" "ENSMUSG00000036634" "ENSMUSG00000026830"
##  [7] "ENSMUSG00000031775" "ENSMUSG00000006782" "ENSMUSG00000046160"
## [10] "ENSMUSG00000050121" "ENSMUSG00000032060" "ENSMUSG00000033579"
## [13] "ENSMUSG00000032517" "ENSMUSG00000022425" "ENSMUSG00000027375"
## [16] "ENSMUSG00000041607" "ENSMUSG00000020486" "ENSMUSG00000027562"
## [19] "ENSMUSG00000027858" "ENSMUSG00000040759" "ENSMUSG00000013523"
## [22] "ENSMUSG00000073680" "ENSMUSG00000022090" "ENSMUSG00000026888"
## [25] "ENSMUSG00000037166" "ENSMUSG00000027199" "ENSMUSG00000043448"
## [28] "ENSMUSG00000090996" "ENSMUSG00000011884" "ENSMUSG00000028412"
## [1] ""
##  [1] "ENSMUSG00000027447" "ENSMUSG00000018451" "ENSMUSG00000036896"
##  [4] "ENSMUSG00000036887" "ENSMUSG00000036905" "ENSMUSG00000030579"
##  [7] "ENSMUSG00000058715" "ENSMUSG00000028581" "ENSMUSG00000024621"
## [10] "ENSMUSG00000022587" "ENSMUSG00000026576" "ENSMUSG00000024397"
## [13] "ENSMUSG00000023992" "ENSMUSG00000021423" "ENSMUSG00000015852"
## [16] "ENSMUSG00000038642" "ENSMUSG00000021268" "ENSMUSG00000008682"
## [19] "ENSMUSG00000046805" "ENSMUSG00000036353" "ENSMUSG00000050711"
## [22] "ENSMUSG00000033061" "ENSMUSG00000048163" "ENSMUSG00000060802"
## [25] "ENSMUSG00000040747" "ENSMUSG00000059498" "ENSMUSG00000030786"
## [28] "ENSMUSG00000050621" "ENSMUSG00000021665" "ENSMUSG00000036438"
## [1] ""
## [1] ""
## [1] "PC4"
##  [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
##  [4] "ENSMUSG00000030579" "ENSMUSG00000058715" "ENSMUSG00000024621"
##  [7] "ENSMUSG00000021423" "ENSMUSG00000028581" "ENSMUSG00000024397"
## [10] "ENSMUSG00000038642" "ENSMUSG00000023992" "ENSMUSG00000015852"
## [13] "ENSMUSG00000036353" "ENSMUSG00000046805" "ENSMUSG00000048163"
## [16] "ENSMUSG00000030786" "ENSMUSG00000059498" "ENSMUSG00000040747"
## [19] "ENSMUSG00000000682" "ENSMUSG00000021665" "ENSMUSG00000052160"
## [22] "ENSMUSG00000040229" "ENSMUSG00000036908" "ENSMUSG00000018008"
## [25] "ENSMUSG00000030844" "ENSMUSG00000069516" "ENSMUSG00000044811"
## [28] "ENSMUSG00000020377" "ENSMUSG00000089929" "ENSMUSG00000052336"
## [1] ""
##  [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
##  [4] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000040584"
##  [7] "ENSMUSG00000020154" "ENSMUSG00000041378" "ENSMUSG00000030237"
## [10] "ENSMUSG00000030235" "ENSMUSG00000061353" "ENSMUSG00000029648"
## [13] "ENSMUSG00000039167" "ENSMUSG00000001946" "ENSMUSG00000033960"
## [16] "ENSMUSG00000031239" "ENSMUSG00000039349" "ENSMUSG00000024140"
## [19] "ENSMUSG00000006386" "ENSMUSG00000020717" "ENSMUSG00000114487"
## [22] "ENSMUSG00000042116" "ENSMUSG00000031871" "ENSMUSG00000027435"
## [25] "ENSMUSG00000019966" "ENSMUSG00000034738" "ENSMUSG00000062960"
## [28] "ENSMUSG00000040732" "ENSMUSG00000039831" "ENSMUSG00000045954"
## [1] ""
## [1] ""
## [1] "PC5"
##  [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
##  [4] "ENSMUSG00000007682" "ENSMUSG00000034810" "ENSMUSG00000029838"
##  [7] "ENSMUSG00000022528" "ENSMUSG00000055653" "ENSMUSG00000063564"
## [10] "ENSMUSG00000045005" "ENSMUSG00000030629" "ENSMUSG00000052387"
## [13] "ENSMUSG00000022816" "ENSMUSG00000026185" "ENSMUSG00000047786"
## [16] "ENSMUSG00000021250" "ENSMUSG00000017493" "ENSMUSG00000017390"
## [19] "ENSMUSG00000024518" "ENSMUSG00000064351" "ENSMUSG00000022419"
## [22] "ENSMUSG00000034640" "ENSMUSG00000008540" "ENSMUSG00000030111"
## [25] "ENSMUSG00000064370" "ENSMUSG00000093460" "ENSMUSG00000038418"
## [28] "ENSMUSG00000061718" "ENSMUSG00000027239" "ENSMUSG00000064215"
## [1] ""
##  [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
##  [4] "ENSMUSG00000027800" "ENSMUSG00000094800" "ENSMUSG00000044772"
##  [7] "ENSMUSG00000027744" "ENSMUSG00000072674" "ENSMUSG00000032595"
## [10] "ENSMUSG00000110332" "ENSMUSG00000045655" "ENSMUSG00000041323"
## [13] "ENSMUSG00000047139" "ENSMUSG00000095304" "ENSMUSG00000108841"
## [16] "ENSMUSG00000022037" "ENSMUSG00000047394" "ENSMUSG00000029182"
## [19] "ENSMUSG00000072473" "ENSMUSG00000046242" "ENSMUSG00000047671"
## [22] "ENSMUSG00000044475" "ENSMUSG00000020473" "ENSMUSG00000027360"
## [25] "ENSMUSG00000026683" "ENSMUSG00000028441" "ENSMUSG00000038135"
## [28] "ENSMUSG00000026301" "ENSMUSG00000091345" "ENSMUSG00000027867"
## [1] ""
## [1] ""

Determine statistically significant principal components

## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_chow <- JackStraw(object = seurat_campbell_chow,
                  num.replicate = 100,
                  display.progress = TRUE
                  )
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  7.0821281393369 mins
# Maximum number of PCs allowed = 20.


## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_chow, PCs = 1:20)
## Warning: Removed 25440 rows containing missing values (geom_point).

## An object of class seurat in project CAMPBELL_CHOW 
##  30000 genes across 10897 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot 
PCElbowPlot(object = seurat_campbell_chow)

There is little difference in the Jackstraw and elbow plot when using 1817, 2346 or 4084 variable genes.

Cell clustering

## Cluster cells by PC
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
         no.legend = TRUE,
         do.label = TRUE)

## 4084 variable genes = 19 clusters
## 2346 variable genes = 20 clusters
## 1817 variable genes = 20 clusters

table(seurat_campbell_chow@ident)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 2262 1772 1405 1338  653  536  527  343  324  304  250  209  207  188  165 
##   15   16   17   18   19 
##  136  110   66   58   44
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.10912    0.37939    0.07658    0.42257    0.31911
##   1     0.16581    0.12646    0.23146    0.11024    0.08535
##   2     0.11880    0.14857    0.12349    0.17585    0.10281
##   3     0.17037    0.04129    0.18938    0.03150    0.05044
##   4     0.07123    0.03609    0.07347    0.03675    0.06305
##   5     0.04501    0.05299    0.03174    0.01837    0.11251
##   6     0.04416    0.04681    0.05864    0.03150    0.04462
##   7     0.04074    0.02341    0.03794    0.00787    0.01455
##   8     0.02764    0.03023    0.02898    0.03937    0.03395
##   9     0.04359    0.01528    0.03277    0.01050    0.00485
##   10    0.02707    0.02438    0.02346    0.00787    0.00873
##   11    0.03020    0.00748    0.02518    0.00262    0.00582
##   12    0.02194    0.01983    0.01035    0.02625    0.02813
##   13    0.02165    0.01073    0.01587    0.01575    0.02619
##   14    0.01880    0.01040    0.01069    0.02100    0.02716
##   15    0.01396    0.01300    0.01414    0.01575    0.00000
##   16    0.00769    0.00293    0.00000    0.02362    0.06305
##   17    0.00969    0.00455    0.00414    0.00262    0.00485
##   18    0.00627    0.00488    0.00586    0.00000    0.00388
##   19    0.00627    0.00130    0.00586    0.00000    0.00097
TSNEPlot(object = seurat_campbell_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident,
                                            seurat_campbell_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
         no.legend = TRUE,
         do.label = TRUE)

##  4084 variable genes = 21 clusters
##  2346 variable genes = 22 clusters
table(seurat_campbell_chow@ident)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 1851 1685 1406 1255  660  631  542  378  343  326  304  285  250  209  188 
##   15   16   17   18   19   20 
##  165  136  110   70   58   45
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.08832    0.31209    0.05795    0.32808    0.27934
##   1     0.15954    0.11834    0.22146    0.09449    0.08050
##   2     0.11880    0.14857    0.12384    0.17585    0.10281
##   3     0.15698    0.04454    0.17040    0.03937    0.05626
##   4     0.07322    0.03576    0.07382    0.03675    0.06305
##   5     0.04872    0.06860    0.06175    0.03937    0.05335
##   6     0.04501    0.05299    0.03380    0.02100    0.11154
##   7     0.02991    0.04194    0.03036    0.07612    0.02619
##   8     0.04074    0.02341    0.03794    0.00787    0.01455
##   9     0.02735    0.03056    0.02967    0.03937    0.03395
##   10    0.04359    0.01528    0.03277    0.01050    0.00485
##   11    0.02621    0.02828    0.01932    0.04199    0.03298
##   12    0.02707    0.02438    0.02346    0.00787    0.00873
##   13    0.03020    0.00748    0.02518    0.00262    0.00582
##   14    0.02165    0.01073    0.01587    0.01575    0.02619
##   15    0.01880    0.01040    0.01069    0.02100    0.02716
##   16    0.01396    0.01300    0.01414    0.01575    0.00000
##   17    0.00769    0.00293    0.00000    0.02362    0.06305
##   18    0.00969    0.00455    0.00552    0.00262    0.00485
##   19    0.00627    0.00488    0.00586    0.00000    0.00388
##   20    0.00627    0.00130    0.00621    0.00000    0.00097
TSNEPlot(object = seurat_campbell_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident, 
                                            seurat_campbell_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
         no.legend = TRUE,
         do.label = TRUE)

##  4084 variable genes = 25 clusters
##  2346 variable genes = 24 clusters

table(seurat_campbell_chow@ident)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 1603 1257 1061  764  712  671  651  641  581  546  343  320  304  251  209 
##   15   16   17   18   19   20   21   22 
##  207  188  165  136  111   74   58   44
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.07179    0.27926    0.04277    0.29134    0.24927
##   1     0.15641    0.04616    0.17075    0.03675    0.05529
##   2     0.10969    0.06469    0.14108    0.04987    0.04753
##   3     0.04046    0.11541    0.03587    0.16010    0.09893
##   4     0.05584    0.06177    0.09107    0.06037    0.03783
##   5     0.04957    0.08355    0.05002    0.10761    0.05238
##   6     0.07151    0.03609    0.07244    0.03675    0.06305
##   7     0.07806    0.03316    0.08762    0.01575    0.00485
##   8     0.04615    0.06047    0.03242    0.03675    0.12124
##   9     0.04501    0.05104    0.05933    0.03150    0.04559
##   10    0.04074    0.02341    0.03794    0.00787    0.01455
##   11    0.02735    0.02958    0.02863    0.03937    0.03395
##   12    0.04359    0.01528    0.03277    0.01050    0.00485
##   13    0.02735    0.02438    0.02346    0.00787    0.00873
##   14    0.03020    0.00748    0.02518    0.00262    0.00582
##   15    0.02194    0.01983    0.01035    0.02625    0.02813
##   16    0.02165    0.01073    0.01587    0.01575    0.02619
##   17    0.01880    0.01040    0.01069    0.02100    0.02716
##   18    0.01396    0.01300    0.01414    0.01575    0.00000
##   19    0.00769    0.00293    0.00000    0.02362    0.06402
##   20    0.00969    0.00520    0.00586    0.00262    0.00582
##   21    0.00627    0.00488    0.00586    0.00000    0.00388
##   22    0.00627    0.00130    0.00586    0.00000    0.00097
TSNEPlot(object = seurat_campbell_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident, 
                                            seurat_campbell_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
         no.legend = TRUE,
         do.label = TRUE)

##  4084 variable genes = 29 clusters
## 2346 variable genes = 28 clusters

table(seurat_campbell_chow@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 959 745 736 674 658 655 618 614 585 572 476 474 436 343 320 304 289 251 
##  18  19  20  21  22  23  24  25  26  27 
## 209 207 188 136 110 109  70  58  56  45
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.03419    0.18108    0.02139    0.19423    0.14161
##   1     0.03875    0.11248    0.03484    0.16010    0.09796
##   2     0.08917    0.04486    0.07796    0.03675    0.04365
##   3     0.05185    0.06014    0.08486    0.06037    0.03686
##   4     0.07977    0.03544    0.08831    0.01575    0.00679
##   5     0.06496    0.05104    0.07278    0.04199    0.04171
##   6     0.06781    0.03316    0.06968    0.03150    0.06208
##   7     0.07977    0.00065    0.11142    0.00000    0.00873
##   8     0.03618    0.08778    0.03139    0.11286    0.05238
##   9     0.04587    0.05917    0.03174    0.03412    0.12027
##   10    0.05242    0.01821    0.07692    0.01312    0.00776
##   11    0.02792    0.07217    0.01690    0.08136    0.07177
##   12    0.03162    0.04649    0.03794    0.03412    0.05723
##   13    0.04074    0.02341    0.03794    0.00787    0.01455
##   14    0.02735    0.02958    0.02863    0.03937    0.03395
##   15    0.04359    0.01528    0.03277    0.01050    0.00485
##   16    0.02422    0.02958    0.02725    0.01050    0.02910
##   17    0.02735    0.02438    0.02346    0.00787    0.00873
##   18    0.03020    0.00748    0.02518    0.00262    0.00582
##   19    0.02194    0.01983    0.01035    0.02625    0.02813
##   20    0.02165    0.01073    0.01587    0.01575    0.02619
##   21    0.01396    0.01300    0.01414    0.01575    0.00000
##   22    0.00769    0.00293    0.00000    0.02362    0.06305
##   23    0.01026    0.00943    0.00586    0.01312    0.02134
##   24    0.00969    0.00455    0.00552    0.00262    0.00485
##   25    0.00627    0.00488    0.00586    0.00000    0.00388
##   26    0.00855    0.00098    0.00483    0.00787    0.00582
##   27    0.00627    0.00130    0.00621    0.00000    0.00097
TSNEPlot(object = seurat_campbell_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4 )

# install.packages("PerformanceAnalytics")

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident, 
                                            seurat_campbell_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

# cor(proportion_table[1],proportion_table[4])

## Cluster cells using final parameters (1817 genes, 20 PC, resolution = 0.6)
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)


## Produce t-SNE
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)

Use 1817 genes, 20 principal components and a resolution of 0.6 to give 20 individual clusters. Less clusters are more likely to give more meaningfull functionally distinct groups of cells.

Finding differentially expressed genes between cell clusters (cluster biomarkers)

## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_chow_biomarkers <- FindAllMarkers(object = seurat_campbell_chow, only.pos = FALSE, min.pct = 0.2)


## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_chow_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 200 x 7
## # Groups:   cluster [20]
##        p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene              
##        <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>             
##  1 0.            0.823 0.94  0.53  0.        0       ENSMUSG00000047261
##  2 0.            0.772 0.859 0.445 0.        0       ENSMUSG00000027500
##  3 0.            0.716 0.982 0.701 0.        0       ENSMUSG00000055430
##  4 9.02e-250     0.740 0.822 0.445 2.71e-245 0       ENSMUSG00000044349
##  5 1.18e-248     0.957 0.757 0.432 3.54e-244 0       ENSMUSG00000060188
##  6 9.45e-220     0.723 0.935 0.717 2.83e-215 0       ENSMUSG00000021268
##  7 2.78e-202     0.689 0.734 0.407 8.35e-198 0       ENSMUSG00000000159
##  8 4.90e-159     0.645 0.561 0.269 1.47e-154 0       ENSMUSG00000066392
##  9 6.27e-139     1.46  0.382 0.156 1.88e-134 0       ENSMUSG00000021647
## 10 1.95e- 79     1.92  0.271 0.122 5.84e- 75 0       ENSMUSG00000020660
## # ... with 190 more rows
# write.csv(as.data.frame(seurat_campbell_chow_biomarkers), file = "seurat_campbell_chow_biomarkers.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_chow_biomarkers.csv", quote = FALSE)




## Perform ROC DE test. This can take a long time.
seurat_campbell_chow_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_chow, only.pos = FALSE, min.pct = 0.2, test.use = "roc")

top10_seurat_campbell_markers_ROC<- seurat_campbell_chow_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 200 x 9
## # Groups:   cluster [20]
##    myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene      
##    <dbl>    <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>     
##  1 0.783    0.823 0.566     0.823 0.94  0.53         NA 0       ENSMUSG00…
##  2 0.78     0.716 0.56      0.716 0.982 0.701        NA 0       ENSMUSG00…
##  3 0.755    0.614 0.51      0.614 0.987 0.697        NA 0       ENSMUSG00…
##  4 0.754    0.772 0.508     0.772 0.859 0.445        NA 0       ENSMUSG00…
##  5 0.717    0.740 0.434     0.740 0.822 0.445        NA 0       ENSMUSG00…
##  6 0.715    0.957 0.430     0.957 0.757 0.432        NA 0       ENSMUSG00…
##  7 0.714    0.723 0.428     0.723 0.935 0.717        NA 0       ENSMUSG00…
##  8 0.711    0.634 0.422     0.634 0.803 0.419        NA 0       ENSMUSG00…
##  9 0.705    0.644 0.410     0.644 0.763 0.403        NA 0       ENSMUSG00…
## 10 0.701    0.623 0.402     0.623 0.771 0.399        NA 0       ENSMUSG00…
## # ... with 190 more rows
# write.csv(as.data.frame(seurat_campbell_chow_biomarkers_ROC), file = "seurat_campbell_chow_biomarkers_ROC.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_chow_biomarkers_ROC.csv", quote = FALSE)

## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_chow,
          genes.use = top10_seurat_campbell_markers$gene,
          slim.col.label = TRUE,
          remove.key = TRUE)

Save seurat object of chow mice.

## save seurat object as .rds 
#saveRDS(seurat_campbell_chow, file = "./seurat_campbell_chow_final.rds")

EdgeR batch correction of CHOW fed mice

Analysis of Campbell scRNAseq data for Chow fed mice (5 replicates) using seurat with batch correction

Load CHOW mouse data

## Load seurat object
seurat_campbell_batch_edgeR_chow<- readRDS(file = "./seurat_campbell_chow_just_created.rds")

seurat_campbell_batch_edgeR_chow
## An object of class seurat in project CAMPBELL_CHOW 
##  30000 genes across 11255 samples.
# An object of class seurat in project CAMPBELL_CHOW 
#  30000 genes across 11255 samples.

Quality control on CHOW mouse data

## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)


## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url


## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_batch_edgeR_chow@raw.data[mito_genes, ]

mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]

seurat_campbell_batch_edgeR_chow@raw.data[mito_genes_present, 1:5]
##                    SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000028                       1                       0
## ENSMUSG00000000037                       0                       0
## ENSMUSG00000000148                       0                       0
## ENSMUSG00000000142                       0                       0
## ENSMUSG00000000183                       0                       0
## ENSMUSG00000000149                       0                       0
## ENSMUSG00000000131                       0                       0
## ENSMUSG00000000125                       0                       0
## ENSMUSG00000000085                       0                       0
## ENSMUSG00000000171                       0                       0
## ENSMUSG00000000093                       0                       0
## ENSMUSG00000000157                       0                       0
## ENSMUSG00000000094                       0                       0
## ENSMUSG00000000127                       0                       0
## ENSMUSG00000000103                       0                       0
## ENSMUSG00000000182                       0                       0
## ENSMUSG00000000058                       0                       0
## ENSMUSG00000000154                       0                       0
## ENSMUSG00000000159                       0                       3
## ENSMUSG00000000056                       0                       0
## ENSMUSG00000000049                       0                       0
## ENSMUSG00000000168                       0                       0
## ENSMUSG00000000134                       0                       0
## ENSMUSG00000000078                       0                       0
## ENSMUSG00000000001                       0                       0
## ENSMUSG00000000194                       0                       0
## ENSMUSG00000000126                       0                       0
## ENSMUSG00000000167                       0                       0
## ENSMUSG00000000184                       0                       0
## ENSMUSG00000000088                       2                       0
## ENSMUSG00000000120                       0                       0
## ENSMUSG00000000031                       0                       0
##                    SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000028                       0                       0
## ENSMUSG00000000037                       0                       0
## ENSMUSG00000000148                       0                       0
## ENSMUSG00000000142                       0                       0
## ENSMUSG00000000183                       0                       0
## ENSMUSG00000000149                       0                       0
## ENSMUSG00000000131                       0                       0
## ENSMUSG00000000125                       0                       0
## ENSMUSG00000000085                       1                       0
## ENSMUSG00000000171                       0                       3
## ENSMUSG00000000093                       0                       0
## ENSMUSG00000000157                       0                       0
## ENSMUSG00000000094                       0                       0
## ENSMUSG00000000127                       0                       0
## ENSMUSG00000000103                       0                       0
## ENSMUSG00000000182                       0                       0
## ENSMUSG00000000058                       0                       0
## ENSMUSG00000000154                       0                       0
## ENSMUSG00000000159                       0                      11
## ENSMUSG00000000056                       0                       0
## ENSMUSG00000000049                       0                       0
## ENSMUSG00000000168                       0                       0
## ENSMUSG00000000134                       0                       0
## ENSMUSG00000000078                       3                       0
## ENSMUSG00000000001                       0                       0
## ENSMUSG00000000194                       0                       0
## ENSMUSG00000000126                       0                       0
## ENSMUSG00000000167                       0                       0
## ENSMUSG00000000184                       0                       0
## ENSMUSG00000000088                       0                       2
## ENSMUSG00000000120                       0                       0
## ENSMUSG00000000031                       0                       0
##                    SRR5164436_AAAACACTTCAT
## ENSMUSG00000000028                       0
## ENSMUSG00000000037                       0
## ENSMUSG00000000148                       0
## ENSMUSG00000000142                       0
## ENSMUSG00000000183                       0
## ENSMUSG00000000149                       0
## ENSMUSG00000000131                       2
## ENSMUSG00000000125                       0
## ENSMUSG00000000085                       1
## ENSMUSG00000000171                       1
## ENSMUSG00000000093                       0
## ENSMUSG00000000157                       0
## ENSMUSG00000000094                       0
## ENSMUSG00000000127                       0
## ENSMUSG00000000103                       0
## ENSMUSG00000000182                       0
## ENSMUSG00000000058                       0
## ENSMUSG00000000154                       0
## ENSMUSG00000000159                      14
## ENSMUSG00000000056                       0
## ENSMUSG00000000049                       0
## ENSMUSG00000000168                       3
## ENSMUSG00000000134                       0
## ENSMUSG00000000078                       0
## ENSMUSG00000000001                       0
## ENSMUSG00000000194                       0
## ENSMUSG00000000126                       0
## ENSMUSG00000000167                       0
## ENSMUSG00000000184                       0
## ENSMUSG00000000088                       2
## ENSMUSG00000000120                       0
## ENSMUSG00000000031                       0
dim(seurat_campbell_batch_edgeR_chow@raw.data[mito_genes_present, ])
## [1]    32 11255
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_batch_edgeR_chow@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_batch_edgeR_chow@raw.data)


## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0006089 0.0011086 0.0013382 0.0018298 0.0106698
## add percentage mitocondrial genes into metadata
seurat_campbell_batch_edgeR_chow <- AddMetaData(object = seurat_campbell_batch_edgeR_chow,
                    metadata = percent_mito,
                    col.name = "percent_mito")


## Look at the seurat object meta data
head(seurat_campbell_batch_edgeR_chow@meta.data)
##                         nGene  nUMI orig.ident replicate_name percent_mito
## SRR5164436_AAAAAATGCATG  1158  2043 SRR5164436     SRR5164436 0.0014684288
## SRR5164436_AAAAACACGACG  2171  4531 SRR5164436     SRR5164436 0.0006623979
## SRR5164436_AAAAAGAAAAAT  1286  2154 SRR5164436     SRR5164436 0.0018578727
## SRR5164436_AAAAATGCACTA  3464  8755 SRR5164436     SRR5164436 0.0018289895
## SRR5164436_AAAACACTTCAT  3626 10143 SRR5164436     SRR5164436 0.0022691397
## SRR5164436_AAAACGAACATG  3775 10500 SRR5164436     SRR5164436 0.0012388031
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_batch_edgeR_chow,
        features.plot = c("nGene", "nUMI", "percent_mito"),
        nCol = 3,
        x.lab.rot = TRUE,
        point.size.use = 0.2
        )

## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)

GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)

Filter cells after QC

Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 6000 genes expressed.

## manual check; I already know all cells have >800 genes
table(seurat_campbell_batch_edgeR_chow@meta.data$percent_mito < 0.004 & seurat_campbell_batch_edgeR_chow@meta.data$nGene<6000)
## 
## FALSE  TRUE 
##   358 10897
#  From: 30000 genes across 11255 samples.
# To:
# FALSE  TRUE 
#  358 10897 
 
## Filter cells with <0.4% percent_mito and <6000 genes
seurat_campbell_batch_edgeR_chow <- FilterCells(object = seurat_campbell_batch_edgeR_chow,
                    subset.names = c("nGene", "percent_mito"),
                    low.thresholds = c(800, -Inf),
                    high.thresholds = c(6000, 0.004))
 
seurat_campbell_batch_edgeR_chow
## An object of class seurat in project CAMPBELL_CHOW 
##  30000 genes across 10897 samples.
# An object of class seurat in project CAMPBELL_CHOW 
# 30000 genes across 10897 samples.
# 358 cells are filtered out; numbers consistent with above

Log normalise gene expression per cell

## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_batch_edgeR_chow@data),
     breaks = 100,
     main = "Total expression before normalisation",
     xlab = "Sum of expression")

## Normalise gene expression per cell
seurat_campbell_batch_edgeR_chow <- NormalizeData(object = seurat_campbell_batch_edgeR_chow, normalization.method = "LogNormalize", 
    scale.factor = 1e4)

## Plot graph of total expression after normalisation
hist(colSums(seurat_campbell_batch_edgeR_chow@data),
     breaks = 100,
     main = "Total expression after normalisation",
     xlab = "Sum of expression")

## Examine data:
seurat_campbell_batch_edgeR_chow@data[1:10,1:10]
## 10 x 10 sparse Matrix of class "dgCMatrix"
##    [[ suppressing 10 column names 'SRR5164436_AAAAAATGCATG', 'SRR5164436_AAAAACACGACG', 'SRR5164436_AAAAAGAAAAAT' ... ]]
##                                                                     
## ENSMUSG00000000001 .        . .        .        .         .        .
## ENSMUSG00000000028 1.774064 . .        .        .         .        .
## ENSMUSG00000000031 .        . .        .        .         .        .
## ENSMUSG00000000037 .        . .        .        .         .        .
## ENSMUSG00000000049 .        . .        .        .         .        .
## ENSMUSG00000000056 .        . .        .        .         .        .
## ENSMUSG00000000058 .        . .        .        .         1.066726 .
## ENSMUSG00000000078 .        . 2.703644 .        .         1.904724 .
## ENSMUSG00000000085 .        . 1.730714 .        0.6864158 .        .
## ENSMUSG00000000088 2.378576 . .        1.189743 1.0896270 1.066726 .
##                                       
## ENSMUSG00000000001 .        .        .
## ENSMUSG00000000028 .        .        .
## ENSMUSG00000000031 .        .        .
## ENSMUSG00000000037 .        .        .
## ENSMUSG00000000049 .        .        .
## ENSMUSG00000000056 1.709448 .        .
## ENSMUSG00000000058 .        .        .
## ENSMUSG00000000078 .        .        .
## ENSMUSG00000000085 .        .        .
## ENSMUSG00000000088 .        1.160955 .

Perform batch correction using edgeR

Paper says removeBatchEffect is an edgeR function but actually it is a limma function!?

# par(mfrow=c(1,2))
# do_PCA(seurat_campbell_batch_edgeR_chow@data, plot_title="before removeBatchEffect()")

seurat_campbell_batch_edgeR_chow@data = removeBatchEffect(seurat_campbell_batch_edgeR_chow@data,
                                                          batch = seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)
seurat_campbell_batch_edgeR_chow@data[1:10,1:10]
##                    SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            1.7742737394            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078           -0.0071409545           -0.0071409545
## ENSMUSG00000000085           -0.0033954029           -0.0033954029
## ENSMUSG00000000088            2.3764941945           -0.0020815808
##                    SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078            2.6965025860           -0.0071409545
## ENSMUSG00000000085            1.7273184316           -0.0033954029
## ENSMUSG00000000088           -0.0020815808            1.1876615160
##                    SRR5164436_AAAACACTTCAT SRR5164436_AAAACGAACATG
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577            1.0664999199
## ENSMUSG00000000078           -0.0071409545            1.8975829800
## ENSMUSG00000000085            0.6830204120           -0.0033954029
## ENSMUSG00000000088            1.0875454466            1.0646446969
##                    SRR5164436_AAAACGACTCAA SRR5164436_AAAACGACTCAC
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079            1.7055505837
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078           -0.0071409545           -0.0071409545
## ENSMUSG00000000085           -0.0033954029           -0.0033954029
## ENSMUSG00000000088           -0.0020815808           -0.0020815808
##                    SRR5164436_AAAACGACTCAG SRR5164436_AAAACGACTCAT
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078           -0.0071409545           -0.0071409545
## ENSMUSG00000000085           -0.0033954029           -0.0033954029
## ENSMUSG00000000088            1.1588738384           -0.0020815808
# do_PCA(seurat_campbell_batch_edgeR_chow@data, plot_title="after removeBatchEffect()")

Find genes whose expression varies between cells.

Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.

## Find variable genes by expression
seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow,
                          mean.function = ExpMean,
                          dispersion.function = LogVMR,
                          x.low.cutoff = 0.05,
                          x.high.cutoff = 4,
                          y.cutoff = 0.75,
                          num.bin = 20,
                          binning.method = "equal_width"
                          )

# number of variable genes
length(seurat_campbell_batch_edgeR_chow@var.genes)
## [1] 1645

seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1645 variable genes

seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 2121 variable genes

seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 3463 variable genes

seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 300, binning.method = “equal_width” )

                      == 2631 variable genes

Scale normalised gene expression per cell to remove unwanted sources of variation

Scale gene expression per cell by building linear models for nUMI, percent_mito. Do not scale for mouse replicate as this has already been corrected for using limma?

## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_batch_edgeR_chow <- ScaleData(object = seurat_campbell_batch_edgeR_chow, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
## 
## Time Elapsed:  49.757223367691 secs
## Scaling data matrix
seurat_campbell_batch_edgeR_chow@data[1:10,1:10]
##                    SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            1.7742737394            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078           -0.0071409545           -0.0071409545
## ENSMUSG00000000085           -0.0033954029           -0.0033954029
## ENSMUSG00000000088            2.3764941945           -0.0020815808
##                    SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078            2.6965025860           -0.0071409545
## ENSMUSG00000000085            1.7273184316           -0.0033954029
## ENSMUSG00000000088           -0.0020815808            1.1876615160
##                    SRR5164436_AAAACACTTCAT SRR5164436_AAAACGAACATG
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577            1.0664999199
## ENSMUSG00000000078           -0.0071409545            1.8975829800
## ENSMUSG00000000085            0.6830204120           -0.0033954029
## ENSMUSG00000000088            1.0875454466            1.0646446969
##                    SRR5164436_AAAACGACTCAA SRR5164436_AAAACGACTCAC
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079            1.7055505837
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078           -0.0071409545           -0.0071409545
## ENSMUSG00000000085           -0.0033954029           -0.0033954029
## ENSMUSG00000000088           -0.0020815808           -0.0020815808
##                    SRR5164436_AAAACGACTCAG SRR5164436_AAAACGACTCAT
## ENSMUSG00000000001           -0.0106040862           -0.0106040862
## ENSMUSG00000000028            0.0002094769            0.0002094769
## ENSMUSG00000000031           -0.0016074305           -0.0016074305
## ENSMUSG00000000037            0.0011305821            0.0011305821
## ENSMUSG00000000049           -0.0011731937           -0.0011731937
## ENSMUSG00000000056           -0.0038975079           -0.0038975079
## ENSMUSG00000000058           -0.0002263577           -0.0002263577
## ENSMUSG00000000078           -0.0071409545           -0.0071409545
## ENSMUSG00000000085           -0.0033954029           -0.0033954029
## ENSMUSG00000000088            1.1588738384           -0.0020815808

Principal component anlysis of variable genes.

Principal component anlysis of variable genes for use in cell clustering.

## Perform principal component analysis on variable genes
seurat_campbell_batch_edgeR_chow <- RunPCA(object = seurat_campbell_batch_edgeR_chow,
               pc.genes = seurat_campbell_batch_edgeR_chow@var.genes,
               do.print = TRUE,
               pcs.print = 1:5,
               genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000026701" "ENSMUSG00000000567"
## [1] ""
## [1] "ENSMUSG00000050711" "ENSMUSG00000027350" "ENSMUSG00000027273"
## [4] "ENSMUSG00000024261" "ENSMUSG00000000159"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000060802"
## [4] "ENSMUSG00000036896" "ENSMUSG00000036887"
## [1] ""
## [1] "ENSMUSG00000067786" "ENSMUSG00000055254" "ENSMUSG00000031760"
## [4] "ENSMUSG00000037852" "ENSMUSG00000026701"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
## [4] "ENSMUSG00000076439" "ENSMUSG00000036634"
## [1] ""
## [1] "ENSMUSG00000027447" "ENSMUSG00000036896" "ENSMUSG00000036887"
## [4] "ENSMUSG00000036905" "ENSMUSG00000030579"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
## [4] "ENSMUSG00000007682" "ENSMUSG00000034810"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
## [4] "ENSMUSG00000027800" "ENSMUSG00000094800"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_batch_edgeR_chow, pcs.use = 1:9)

## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_batch_edgeR_chow, dim.1 = 1, dim.2 = 2)

## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_batch_edgeR_chow, dim.1 = 2, dim.2 = 3)

## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 5 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 5, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!

## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow,
          pc.use = 6:20,
          cells.use = 500,
          do.balanced = TRUE,
          label.columns = FALSE)

## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_batch_edgeR_chow <- ProjectPCA(object = seurat_campbell_batch_edgeR_chow, do.print = TRUE)
## [1] "PC1"
##  [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
##  [4] "ENSMUSG00000026701" "ENSMUSG00000000567" "ENSMUSG00000050953"
##  [7] "ENSMUSG00000018593" "ENSMUSG00000001025" "ENSMUSG00000031762"
## [10] "ENSMUSG00000035805" "ENSMUSG00000026728" "ENSMUSG00000067786"
## [13] "ENSMUSG00000095538" "ENSMUSG00000022528" "ENSMUSG00000058135"
## [16] "ENSMUSG00000005360" "ENSMUSG00000018102" "ENSMUSG00000027712"
## [19] "ENSMUSG00000029838" "ENSMUSG00000021250" "ENSMUSG00000030342"
## [22] "ENSMUSG00000032231" "ENSMUSG00000034467" "ENSMUSG00000008540"
## [25] "ENSMUSG00000053931" "ENSMUSG00000036570" "ENSMUSG00000026649"
## [28] "ENSMUSG00000063564" "ENSMUSG00000055254" "ENSMUSG00000017009"
## [1] ""
##  [1] "ENSMUSG00000033061" "ENSMUSG00000050711" "ENSMUSG00000047261"
##  [4] "ENSMUSG00000027581" "ENSMUSG00000055430" "ENSMUSG00000019986"
##  [7] "ENSMUSG00000026576" "ENSMUSG00000024268" "ENSMUSG00000021087"
## [10] "ENSMUSG00000025468" "ENSMUSG00000043388" "ENSMUSG00000040785"
## [13] "ENSMUSG00000042750" "ENSMUSG00000027500" "ENSMUSG00000029223"
## [16] "ENSMUSG00000022577" "ENSMUSG00000036699" "ENSMUSG00000027350"
## [19] "ENSMUSG00000059361" "ENSMUSG00000019923" "ENSMUSG00000044349"
## [22] "ENSMUSG00000035964" "ENSMUSG00000027273" "ENSMUSG00000018965"
## [25] "ENSMUSG00000071658" "ENSMUSG00000039278" "ENSMUSG00000024261"
## [28] "ENSMUSG00000031840" "ENSMUSG00000024423" "ENSMUSG00000000159"
## [1] ""
## [1] ""
## [1] "PC2"
##  [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000060802"
##  [4] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000079018"
##  [7] "ENSMUSG00000075602" "ENSMUSG00000029484" "ENSMUSG00000030579"
## [10] "ENSMUSG00000028581" "ENSMUSG00000058715" "ENSMUSG00000036256"
## [13] "ENSMUSG00000026365" "ENSMUSG00000029622" "ENSMUSG00000022584"
## [16] "ENSMUSG00000024621" "ENSMUSG00000064373" "ENSMUSG00000024397"
## [19] "ENSMUSG00000056492" "ENSMUSG00000021423" "ENSMUSG00000023992"
## [22] "ENSMUSG00000015852" "ENSMUSG00000038642" "ENSMUSG00000001123"
## [25] "ENSMUSG00000040584" "ENSMUSG00000020154" "ENSMUSG00000016494"
## [28] "ENSMUSG00000041378" "ENSMUSG00000046805" "ENSMUSG00000030237"
## [1] ""
##  [1] "ENSMUSG00000018451" "ENSMUSG00000021270" "ENSMUSG00000067786"
##  [4] "ENSMUSG00000055254" "ENSMUSG00000026223" "ENSMUSG00000031428"
##  [7] "ENSMUSG00000052727" "ENSMUSG00000037926" "ENSMUSG00000039278"
## [10] "ENSMUSG00000101111" "ENSMUSG00000031760" "ENSMUSG00000037852"
## [13] "ENSMUSG00000031633" "ENSMUSG00000026701" "ENSMUSG00000055430"
## [16] "ENSMUSG00000021379" "ENSMUSG00000019986" "ENSMUSG00000040785"
## [19] "ENSMUSG00000035805" "ENSMUSG00000046432" "ENSMUSG00000017390"
## [22] "ENSMUSG00000042750" "ENSMUSG00000034467" "ENSMUSG00000079037"
## [25] "ENSMUSG00000025666" "ENSMUSG00000015222" "ENSMUSG00000021087"
## [28] "ENSMUSG00000095538" "ENSMUSG00000026649" "ENSMUSG00000064357"
## [1] ""
## [1] ""
## [1] "PC3"
##  [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
##  [4] "ENSMUSG00000076439" "ENSMUSG00000036634" "ENSMUSG00000026830"
##  [7] "ENSMUSG00000031775" "ENSMUSG00000006782" "ENSMUSG00000046160"
## [10] "ENSMUSG00000032060" "ENSMUSG00000050121" "ENSMUSG00000033579"
## [13] "ENSMUSG00000022425" "ENSMUSG00000032517" "ENSMUSG00000027375"
## [16] "ENSMUSG00000041607" "ENSMUSG00000020486" "ENSMUSG00000027562"
## [19] "ENSMUSG00000040759" "ENSMUSG00000027858" "ENSMUSG00000013523"
## [22] "ENSMUSG00000073680" "ENSMUSG00000022090" "ENSMUSG00000026888"
## [25] "ENSMUSG00000037166" "ENSMUSG00000027199" "ENSMUSG00000043448"
## [28] "ENSMUSG00000028412" "ENSMUSG00000011884" "ENSMUSG00000090996"
## [1] ""
##  [1] "ENSMUSG00000027447" "ENSMUSG00000036896" "ENSMUSG00000036887"
##  [4] "ENSMUSG00000036905" "ENSMUSG00000018451" "ENSMUSG00000030579"
##  [7] "ENSMUSG00000058715" "ENSMUSG00000028581" "ENSMUSG00000024621"
## [10] "ENSMUSG00000024397" "ENSMUSG00000023992" "ENSMUSG00000021423"
## [13] "ENSMUSG00000022587" "ENSMUSG00000015852" "ENSMUSG00000038642"
## [16] "ENSMUSG00000026576" "ENSMUSG00000046805" "ENSMUSG00000036353"
## [19] "ENSMUSG00000008682" "ENSMUSG00000021268" "ENSMUSG00000048163"
## [22] "ENSMUSG00000040747" "ENSMUSG00000060802" "ENSMUSG00000059498"
## [25] "ENSMUSG00000050711" "ENSMUSG00000030786" "ENSMUSG00000021665"
## [28] "ENSMUSG00000033061" "ENSMUSG00000030844" "ENSMUSG00000000682"
## [1] ""
## [1] ""
## [1] "PC4"
##  [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
##  [4] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000040584"
##  [7] "ENSMUSG00000020154" "ENSMUSG00000041378" "ENSMUSG00000030237"
## [10] "ENSMUSG00000030235" "ENSMUSG00000061353" "ENSMUSG00000029648"
## [13] "ENSMUSG00000039167" "ENSMUSG00000001946" "ENSMUSG00000033960"
## [16] "ENSMUSG00000031239" "ENSMUSG00000039349" "ENSMUSG00000024140"
## [19] "ENSMUSG00000020717" "ENSMUSG00000006386" "ENSMUSG00000114487"
## [22] "ENSMUSG00000042116" "ENSMUSG00000031871" "ENSMUSG00000027435"
## [25] "ENSMUSG00000019966" "ENSMUSG00000034738" "ENSMUSG00000062960"
## [28] "ENSMUSG00000045954" "ENSMUSG00000040732" "ENSMUSG00000039831"
## [1] ""
##  [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
##  [4] "ENSMUSG00000030579" "ENSMUSG00000058715" "ENSMUSG00000024621"
##  [7] "ENSMUSG00000021423" "ENSMUSG00000028581" "ENSMUSG00000024397"
## [10] "ENSMUSG00000038642" "ENSMUSG00000023992" "ENSMUSG00000015852"
## [13] "ENSMUSG00000036353" "ENSMUSG00000046805" "ENSMUSG00000048163"
## [16] "ENSMUSG00000030786" "ENSMUSG00000059498" "ENSMUSG00000040747"
## [19] "ENSMUSG00000000682" "ENSMUSG00000021665" "ENSMUSG00000052160"
## [22] "ENSMUSG00000040229" "ENSMUSG00000036908" "ENSMUSG00000018008"
## [25] "ENSMUSG00000030844" "ENSMUSG00000069516" "ENSMUSG00000044811"
## [28] "ENSMUSG00000020377" "ENSMUSG00000089929" "ENSMUSG00000052336"
## [1] ""
## [1] ""
## [1] "PC5"
##  [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
##  [4] "ENSMUSG00000007682" "ENSMUSG00000034810" "ENSMUSG00000055653"
##  [7] "ENSMUSG00000029838" "ENSMUSG00000022528" "ENSMUSG00000045005"
## [10] "ENSMUSG00000063564" "ENSMUSG00000030629" "ENSMUSG00000022816"
## [13] "ENSMUSG00000026185" "ENSMUSG00000017493" "ENSMUSG00000052387"
## [16] "ENSMUSG00000047786" "ENSMUSG00000024518" "ENSMUSG00000021250"
## [19] "ENSMUSG00000017390" "ENSMUSG00000064351" "ENSMUSG00000022419"
## [22] "ENSMUSG00000037206" "ENSMUSG00000030111" "ENSMUSG00000064370"
## [25] "ENSMUSG00000034640" "ENSMUSG00000008540" "ENSMUSG00000027239"
## [28] "ENSMUSG00000061718" "ENSMUSG00000093460" "ENSMUSG00000038418"
## [1] ""
##  [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
##  [4] "ENSMUSG00000027800" "ENSMUSG00000094800" "ENSMUSG00000044772"
##  [7] "ENSMUSG00000027744" "ENSMUSG00000032595" "ENSMUSG00000072674"
## [10] "ENSMUSG00000110332" "ENSMUSG00000045655" "ENSMUSG00000041323"
## [13] "ENSMUSG00000047139" "ENSMUSG00000108841" "ENSMUSG00000095304"
## [16] "ENSMUSG00000022037" "ENSMUSG00000047394" "ENSMUSG00000029182"
## [19] "ENSMUSG00000046242" "ENSMUSG00000072473" "ENSMUSG00000047671"
## [22] "ENSMUSG00000044475" "ENSMUSG00000027360" "ENSMUSG00000020473"
## [25] "ENSMUSG00000028441" "ENSMUSG00000026683" "ENSMUSG00000038135"
## [28] "ENSMUSG00000026301" "ENSMUSG00000091345" "ENSMUSG00000027867"
## [1] ""
## [1] ""

Determine statistically significant principal components

## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_batch_edgeR_chow <- JackStraw(object = seurat_campbell_batch_edgeR_chow,
                  num.replicate = 100,
                  display.progress = TRUE
                  )
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  6.37038615544637 mins
# Maximum number of PCs allowed = 20.


## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_batch_edgeR_chow, PCs = 1:20)
## Warning: Removed 23020 rows containing missing values (geom_point).

## An object of class seurat in project CAMPBELL_CHOW 
##  30000 genes across 10897 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot 
PCElbowPlot(object = seurat_campbell_batch_edgeR_chow)

Using the limma batch corrected data 20 PC’s are still significant.

Cell clustering

## Cluster cells by PC
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         no.legend = TRUE,
         do.label = TRUE)

## 1645 variable genes = 19 clusters

table(seurat_campbell_batch_edgeR_chow@ident)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 3612 1755 1398  613  553  513  343  323  303  263  250  210  188  165  135 
##   15   16   17   18 
##  107   64   58   44
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.28433    0.41417    0.27423    0.43832    0.36663
##   1     0.16553    0.12386    0.23042    0.09974    0.08438
##   2     0.11795    0.14759    0.12349    0.17585    0.10184
##   3     0.06838    0.03251    0.06830    0.03150    0.06111
##   4     0.04615    0.05202    0.05795    0.03412    0.04850
##   5     0.03989    0.05657    0.02415    0.03675    0.11154
##   6     0.04046    0.02341    0.03829    0.00787    0.01455
##   7     0.02764    0.02991    0.02932    0.03675    0.03395
##   8     0.04330    0.01528    0.03277    0.01050    0.00485
##   9     0.02507    0.02568    0.01656    0.04199    0.03104
##   10    0.02707    0.02438    0.02346    0.00787    0.00873
##   11    0.03048    0.00748    0.02518    0.00262    0.00582
##   12    0.02137    0.01073    0.01621    0.01575    0.02619
##   13    0.01880    0.01040    0.01069    0.02100    0.02716
##   14    0.01425    0.01300    0.01345    0.01575    0.00000
##   15    0.00712    0.00260    0.00000    0.02100    0.06402
##   16    0.00969    0.00423    0.00379    0.00262    0.00485
##   17    0.00627    0.00488    0.00586    0.00000    0.00388
##   18    0.00627    0.00130    0.00586    0.00000    0.00097
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident,
                                            seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         no.legend = TRUE,
         do.label = TRUE)

##  1645 variable genes = 20 clusters


table(seurat_campbell_batch_edgeR_chow@ident)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 2075 1730 1483 1397  615  593  529  343  322  303  286  250  210  188  165 
##   15   16   17   18   19 
##  135  107   64   58   44
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.11140    0.34590    0.08037    0.38845    0.23181
##   1     0.16439    0.12126    0.22732    0.09974    0.08050
##   2     0.16752    0.06307    0.18800    0.05249    0.13191
##   3     0.11795    0.14759    0.12315    0.17585    0.10184
##   4     0.06838    0.03251    0.06864    0.03150    0.06208
##   5     0.04957    0.05754    0.06106    0.03412    0.05044
##   6     0.04188    0.05624    0.02760    0.03412    0.11251
##   7     0.04046    0.02341    0.03829    0.00787    0.01455
##   8     0.02764    0.02991    0.02898    0.03675    0.03395
##   9     0.04330    0.01528    0.03277    0.01050    0.00485
##   10    0.02621    0.02828    0.01932    0.04199    0.03395
##   11    0.02707    0.02438    0.02346    0.00787    0.00873
##   12    0.03048    0.00748    0.02518    0.00262    0.00582
##   13    0.02137    0.01073    0.01621    0.01575    0.02619
##   14    0.01880    0.01040    0.01069    0.02100    0.02716
##   15    0.01425    0.01300    0.01345    0.01575    0.00000
##   16    0.00712    0.00260    0.00000    0.02100    0.06402
##   17    0.00969    0.00423    0.00379    0.00262    0.00485
##   18    0.00627    0.00488    0.00586    0.00000    0.00388
##   19    0.00627    0.00130    0.00586    0.00000    0.00097
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident, 
                                            seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         no.legend = TRUE,
         do.label = TRUE)

##  1645 variable genes = 24 clusters


table(seurat_campbell_batch_edgeR_chow@ident)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 1741 1071  934  763  742  622  609  607  600  547  343  328  303  256  250 
##   15   16   17   18   19   20   21   22   23 
##  210  209  188  165  136  107   64   58   44
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.09345    0.29746    0.06278    0.30446    0.19399
##   1     0.10940    0.06534    0.14384    0.05249    0.04753
##   2     0.12479    0.00878    0.14453    0.01575    0.04268
##   3     0.03989    0.11834    0.03518    0.16273    0.09214
##   4     0.05983    0.06404    0.09314    0.06037    0.04074
##   5     0.07778    0.02731    0.08658    0.01312    0.00873
##   6     0.06781    0.03218    0.06795    0.03150    0.06111
##   7     0.04644    0.06144    0.04795    0.04462    0.09602
##   8     0.05043    0.05787    0.06209    0.03412    0.05044
##   9     0.04217    0.05819    0.02863    0.03937    0.11833
##   10    0.04046    0.02341    0.03829    0.00787    0.01455
##   11    0.02764    0.03088    0.02967    0.03675    0.03492
##   12    0.04330    0.01528    0.03277    0.01050    0.00485
##   13    0.01339    0.03999    0.01138    0.07087    0.02522
##   14    0.02707    0.02438    0.02346    0.00787    0.00873
##   15    0.03048    0.00748    0.02518    0.00262    0.00582
##   16    0.02194    0.02048    0.01035    0.02887    0.02716
##   17    0.02137    0.01073    0.01621    0.01575    0.02619
##   18    0.01880    0.01040    0.01069    0.02100    0.02716
##   19    0.01425    0.01300    0.01380    0.01575    0.00000
##   20    0.00712    0.00260    0.00000    0.02100    0.06402
##   21    0.00969    0.00423    0.00379    0.00262    0.00485
##   22    0.00627    0.00488    0.00586    0.00000    0.00388
##   23    0.00627    0.00130    0.00586    0.00000    0.00097
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident, 
                                            seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         no.legend = TRUE,
         do.label = TRUE)

##  1645 variable genes = 29 clusters


table(seurat_campbell_batch_edgeR_chow@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 924 902 880 870 758 648 622 613 605 501 343 325 310 303 290 286 273 250 
##  18  19  20  21  22  23  24  25  26  27  28 
## 222 188 170 136 110 107  64  58  55  44  40
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
##   0     0.12251    0.00975    0.14315    0.01837    0.04074
##   1     0.05071    0.15085    0.03587    0.12861    0.10378
##   2     0.04672    0.14824    0.03036    0.18373    0.09893
##   3     0.08405    0.05559    0.12660    0.03412    0.02328
##   4     0.03960    0.11736    0.03518    0.16273    0.09117
##   5     0.06382    0.04909    0.07347    0.03937    0.04365
##   6     0.07778    0.02731    0.08658    0.01312    0.00873
##   7     0.04701    0.06339    0.04795    0.03937    0.09602
##   8     0.06695    0.03218    0.06761    0.03150    0.06111
##   9     0.03989    0.05267    0.02518    0.02887    0.11154
##   10    0.04046    0.02341    0.03829    0.00787    0.01455
##   11    0.02764    0.03023    0.02932    0.03675    0.03492
##   12    0.02593    0.02893    0.03484    0.02362    0.01940
##   13    0.04330    0.01528    0.03277    0.01050    0.00485
##   14    0.02422    0.02893    0.02691    0.01050    0.03298
##   15    0.02621    0.02828    0.01932    0.04199    0.03395
##   16    0.01396    0.04259    0.01345    0.07612    0.02425
##   17    0.02707    0.02438    0.02346    0.00787    0.00873
##   18    0.01795    0.01691    0.02829    0.02625    0.01455
##   19    0.02137    0.01073    0.01621    0.01575    0.02619
##   20    0.02593    0.00423    0.02035    0.00262    0.00582
##   21    0.01425    0.01300    0.01380    0.01575    0.00000
##   22    0.01026    0.00975    0.00586    0.01312    0.02134
##   23    0.00712    0.00260    0.00000    0.02100    0.06402
##   24    0.00969    0.00423    0.00379    0.00262    0.00485
##   25    0.00627    0.00488    0.00586    0.00000    0.00388
##   26    0.00855    0.00065    0.00483    0.00787    0.00582
##   27    0.00627    0.00130    0.00586    0.00000    0.00097
##   28    0.00456    0.00325    0.00483    0.00000    0.00000
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4 )

# install.packages("PerformanceAnalytics")

proportion_table<- data.frame(matrix(proportion_table, ncol = 5))

colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident, 
                                            seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

# cor(proportion_table[1],proportion_table[4])

## Cluster cells using final parameters (1645 genes, 20 PC, resolution = 0.6)
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20, 
    resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)


## Produce t-SNE
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)

Use 1645 genes, 20 principal components and a resolution of 0.6 to give 19 individual clusters. Although, there is good correlation between the proportion of cells in each cluster from each experimental batch using a resolution of 0.6, there is still batch effects visible. Therefore, the results of using the limma batch correction feature is similar to using the seurat v2.0 scaledata batch correction (maybe a both use a linear model) so there is no benefit to using the limma feature like the Campbell paper.

Finding differentially expressed genes between cell clusters (cluster biomarkers)

## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_batch_edgeR_chow_biomarkers <- FindAllMarkers(object = seurat_campbell_batch_edgeR_chow, only.pos = FALSE, min.pct = 0.2)


## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_batch_edgeR_chow_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 190 x 7
## # Groups:   cluster [19]
##        p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene              
##        <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>             
##  1 0.            1.01  0.948 0.842 0.        0       ENSMUSG00000027500
##  2 0.            0.993 0.976 0.875 0.        0       ENSMUSG00000047261
##  3 0.            0.915 0.995 0.903 0.        0       ENSMUSG00000055430
##  4 6.76e-261     0.865 0.779 0.562 2.03e-256 0       ENSMUSG00000048978
##  5 3.10e-256     0.913 0.946 0.921 9.31e-252 0       ENSMUSG00000066392
##  6 4.94e- 75     0.960 0.686 0.683 1.48e- 70 0       ENSMUSG00000036357
##  7 1.47e- 50     1.56  0.744 0.763 4.40e- 46 0       ENSMUSG00000021647
##  8 4.69e- 13     2.48  0.697 0.756 1.41e-  8 0       ENSMUSG00000020660
##  9 1.27e- 12     1.56  0.54  0.474 3.81e-  8 0       ENSMUSG00000032291
## 10 1.26e-  4     1.41  0.858 0.895 1.00e+  0 0       ENSMUSG00000037727
## # ... with 180 more rows
write.csv(as.data.frame(seurat_campbell_batch_edgeR_chow_biomarkers), file = "seurat_campbell_batch_edgeR_chow_biomarkers.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_batch_edgeR_chow_biomarkers.csv", quote = FALSE)




## Perform ROC DE test. This can take a long time.
seurat_campbell_batch_edgeR_chow_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_batch_edgeR_chow, only.pos = FALSE, min.pct = 0.2, test.use = "roc")

top10_seurat_campbell_markers_ROC<- seurat_campbell_batch_edgeR_chow_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 190 x 9
## # Groups:   cluster [19]
##    myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene      
##    <dbl>    <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>     
##  1 0.82     0.915 0.640     0.915 0.995 0.903        NA 0       ENSMUSG00…
##  2 0.808    0.993 0.616     0.993 0.976 0.875        NA 0       ENSMUSG00…
##  3 0.8      0.799 0.6       0.799 0.993 0.89         NA 0       ENSMUSG00…
##  4 0.787    1.01  0.574     1.01  0.948 0.842        NA 0       ENSMUSG00…
##  5 0.76     0.762 0.52      0.762 0.979 0.892        NA 0       ENSMUSG00…
##  6 0.733    0.749 0.466     0.749 0.939 0.855        NA 0       ENSMUSG00…
##  7 0.73     0.776 0.46      0.776 0.916 0.84         NA 0       ENSMUSG00…
##  8 0.721    0.796 0.442     0.796 0.991 0.953        NA 0       ENSMUSG00…
##  9 0.714    0.779 0.428     0.779 0.962 0.938        NA 0       ENSMUSG00…
## 10 0.701    0.865 0.402     0.865 0.779 0.562        NA 0       ENSMUSG00…
## # ... with 180 more rows
write.csv(as.data.frame(seurat_campbell_batch_edgeR_chow_biomarkers_ROC), file = "seurat_campbell_batch_edgeR_chow_biomarkers_ROC.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_batch_edgeR_chow_biomarkers_ROC.csv", quote = FALSE)

## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_batch_edgeR_chow,
          genes.use = top10_seurat_campbell_markers$gene,
          slim.col.label = TRUE,
          remove.key = TRUE)

Save seurat object of chow mice.

## save seurat object as .rds 
saveRDS(seurat_campbell_batch_edgeR_chow, file = "./seurat_campbell_batch_edgeR_chow_final.rds")

Analysis of Campbell scRNAseq data for fasted mice (3 replicates) using seurat

Load FASTED mouse data

## Load seurat object
seurat_campbell_fasted<- readRDS(file = "./seurat_campbell_fasted_just_created.rds")

seurat_campbell_fasted
## An object of class seurat in project CAMPBELL_FASTED 
##  21789 genes across 3783 samples.

Quality control on CHOW mouse data

## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)


## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url


## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_fasted@raw.data[mito_genes, ]

mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]

seurat_campbell_fasted@raw.data[mito_genes_present, 1:5]
##                    SRR5164441_AAAAAGGGATGC SRR5164441_AAAAATTCGGGC
## ENSMUSG00000000028                       0                       0
## ENSMUSG00000000037                       0                       0
## ENSMUSG00000000159                       1                       3
## ENSMUSG00000000149                       0                       0
## ENSMUSG00000000223                       0                       0
## ENSMUSG00000000167                       0                       0
## ENSMUSG00000000142                       0                       0
## ENSMUSG00000000127                       0                       0
## ENSMUSG00000000085                       0                       0
## ENSMUSG00000000202                       0                       0
## ENSMUSG00000000093                       0                       0
## ENSMUSG00000000171                       0                       0
## ENSMUSG00000000120                       0                       0
## ENSMUSG00000000134                       0                       0
## ENSMUSG00000000125                       0                       0
## ENSMUSG00000000214                       0                       0
## ENSMUSG00000000058                       0                       0
## ENSMUSG00000000168                       0                       0
## ENSMUSG00000000184                       0                       0
## ENSMUSG00000000056                       0                       0
## ENSMUSG00000000049                       0                       0
## ENSMUSG00000000197                       0                       1
## ENSMUSG00000000148                       0                       0
## ENSMUSG00000000078                       0                       0
## ENSMUSG00000000001                       0                       0
## ENSMUSG00000000247                       0                       0
## ENSMUSG00000000131                       0                       0
## ENSMUSG00000000194                       0                       0
## ENSMUSG00000000244                       0                       0
## ENSMUSG00000000088                       0                       1
## ENSMUSG00000000126                       0                       0
## ENSMUSG00000000031                       0                       0
##                    SRR5164441_AAAACGGAAACT SRR5164441_AAAACTACAACT
## ENSMUSG00000000028                       0                       0
## ENSMUSG00000000037                       0                       0
## ENSMUSG00000000159                      14                       0
## ENSMUSG00000000149                       0                       0
## ENSMUSG00000000223                       0                       0
## ENSMUSG00000000167                       0                       0
## ENSMUSG00000000142                       0                       2
## ENSMUSG00000000127                       0                       0
## ENSMUSG00000000085                       0                       0
## ENSMUSG00000000202                       0                       0
## ENSMUSG00000000093                       0                       0
## ENSMUSG00000000171                       0                       0
## ENSMUSG00000000120                       0                       0
## ENSMUSG00000000134                       0                       0
## ENSMUSG00000000125                       0                       0
## ENSMUSG00000000214                       0                       0
## ENSMUSG00000000058                       0                       0
## ENSMUSG00000000168                       0                       0
## ENSMUSG00000000184                       0                       0
## ENSMUSG00000000056                       0                       0
## ENSMUSG00000000049                       0                       0
## ENSMUSG00000000197                       1                       0
## ENSMUSG00000000148                       0                       0
## ENSMUSG00000000078                       0                       0
## ENSMUSG00000000001                       0                       0
## ENSMUSG00000000247                       0                       0
## ENSMUSG00000000131                       0                       0
## ENSMUSG00000000194                       0                       0
## ENSMUSG00000000244                       0                       0
## ENSMUSG00000000088                       0                       0
## ENSMUSG00000000126                       0                       0
## ENSMUSG00000000031                       0                       0
##                    SRR5164441_AAAACTGGTTAT
## ENSMUSG00000000028                       0
## ENSMUSG00000000037                       0
## ENSMUSG00000000159                       0
## ENSMUSG00000000149                       0
## ENSMUSG00000000223                       0
## ENSMUSG00000000167                       0
## ENSMUSG00000000142                       0
## ENSMUSG00000000127                       0
## ENSMUSG00000000085                       0
## ENSMUSG00000000202                       0
## ENSMUSG00000000093                       0
## ENSMUSG00000000171                       0
## ENSMUSG00000000120                       0
## ENSMUSG00000000134                       0
## ENSMUSG00000000125                       0
## ENSMUSG00000000214                       0
## ENSMUSG00000000058                       0
## ENSMUSG00000000168                       0
## ENSMUSG00000000184                       0
## ENSMUSG00000000056                       0
## ENSMUSG00000000049                       0
## ENSMUSG00000000197                       0
## ENSMUSG00000000148                       0
## ENSMUSG00000000078                       0
## ENSMUSG00000000001                       0
## ENSMUSG00000000247                       0
## ENSMUSG00000000131                       0
## ENSMUSG00000000194                       0
## ENSMUSG00000000244                       0
## ENSMUSG00000000088                       0
## ENSMUSG00000000126                       0
## ENSMUSG00000000031                       0
dim(seurat_campbell_fasted@raw.data[mito_genes_present, ])
## [1]   32 3783
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_fasted@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_fasted@raw.data)


## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0007047 0.0013918 0.0016627 0.0023381 0.0120724
## add percentage mitocondrial genes into metadata
seurat_campbell_fasted <- AddMetaData(object = seurat_campbell_fasted,
                    metadata = percent_mito,
                    col.name = "percent_mito")


## Look at the seurat object meta data
head(seurat_campbell_fasted@meta.data)
##                         nGene nUMI orig.ident replicate_name percent_mito
## SRR5164441_AAAAAGGGATGC  1001 1632 SRR5164441     SRR5164441 0.0006127451
## SRR5164441_AAAAATTCGGGC  1789 3300 SRR5164441     SRR5164441 0.0015165302
## SRR5164441_AAAACGGAAACT  1833 3370 SRR5164441     SRR5164441 0.0044589774
## SRR5164441_AAAACTACAACT  1653 2753 SRR5164441     SRR5164441 0.0007278020
## SRR5164441_AAAACTGGTTAT   834 1639 SRR5164441     SRR5164441 0.0000000000
## SRR5164441_AAAACTTCTACA  1334 2115 SRR5164441     SRR5164441 0.0033112583
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_fasted,
        features.plot = c("nGene", "nUMI", "percent_mito"),
        nCol = 3,
        x.lab.rot = TRUE,
        point.size.use = 0.2
        )

## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)

GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)

Filter cells after QC

Filter cells out with more than 0.5% of total gene expression comming from mitocondrially encoded genes and more than 4000 genes expressed.

## manual check; I already know all cells have >800 genes
table(seurat_campbell_fasted@meta.data$percent_mito < 0.005 & seurat_campbell_fasted@meta.data$nGene<4000)
## 
## FALSE  TRUE 
##   143  3640
# FALSE  TRUE 
#  143  3640 
 
## Filter cells with <0.5% percent_mito and <4000 genes
seurat_campbell_fasted <- FilterCells(object = seurat_campbell_fasted,
                    subset.names = c("nGene", "percent_mito"),
                    low.thresholds = c(800, -Inf),
                    high.thresholds = c(4000, 0.005))
 
seurat_campbell_fasted
## An object of class seurat in project CAMPBELL_FASTED 
##  21789 genes across 3640 samples.
# An object of class seurat in project CAMPBELL_FASTED 
# 21789 genes across 3640 samples.

Log normalise gene expression per cell

## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_fasted@data),
     breaks = 100,
     main = "Total expression before normalisation",
     xlab = "Sum of expression")

## Normalise gene expression per cell
seurat_campbell_fasted <- NormalizeData(object = seurat_campbell_fasted, normalization.method = "LogNormalize", scale.factor = 10000)

## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_fasted@data))),
     breaks = 100,
     main = "Total expression after normalisation",
     xlab = "Sum of expression")

Find genes whose expression varies between cells.

Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.

## Find variable genes by expression
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted,
                          mean.function = ExpMean,
                          dispersion.function = LogVMR,
                          x.low.cutoff = 0.05,
                          x.high.cutoff = 3,
                          y.cutoff = 0.75,
                          num.bin = 20,
                          binning.method = "equal_width"
                          )

# number of variable genes
length(seurat_campbell_fasted@var.genes)
## [1] 1379

seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1384 variable genes

seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 2356 variable genes

seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1848 variable genes

seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 3006 variable genes

seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )

                      == 2870 variable gene

seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 3006 variable genes

Scale normalised gene expression per cell to remove unwanted sources of variation

Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.

## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_fasted <- ScaleData(object = seurat_campbell_fasted, vars.to.regress = c("nUMI", "percent_mito", "replicate_name"))
## Regressing out: nUMI, percent_mito, replicate_name
## 
## Time Elapsed:  29.1311025619507 secs
## Scaling data matrix

Principal component anlysis of variable genes.

Principal component anlysis of variable genes for use in cell clustering.

## Perform principal component analysis on variable genes
seurat_campbell_fasted <- RunPCA(object = seurat_campbell_fasted,
               pc.genes = seurat_campbell_fasted@var.genes,
               do.print = TRUE,
               pcs.print = 1:5,
               genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000026385" "ENSMUSG00000018593" "ENSMUSG00000031765"
## [4] "ENSMUSG00000050953" "ENSMUSG00000000567"
## [1] ""
## [1] "ENSMUSG00000027350" "ENSMUSG00000021700" "ENSMUSG00000026787"
## [4] "ENSMUSG00000066392" "ENSMUSG00000027523"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000041323"
## [4] "ENSMUSG00000110332" "ENSMUSG00000020473"
## [1] ""
## [1] "ENSMUSG00000027375" "ENSMUSG00000079018" "ENSMUSG00000075602"
## [4] "ENSMUSG00000022584" "ENSMUSG00000024140"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000026830" "ENSMUSG00000036634"
## [4] "ENSMUSG00000076439" "ENSMUSG00000032517"
## [1] ""
## [1] "ENSMUSG00000079018" "ENSMUSG00000075602" "ENSMUSG00000040584"
## [4] "ENSMUSG00000022584" "ENSMUSG00000041378"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000022528" "ENSMUSG00000063564" "ENSMUSG00000007682"
## [4] "ENSMUSG00000028195" "ENSMUSG00000045005"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
## [4] "ENSMUSG00000038370" "ENSMUSG00000027800"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000029838" "ENSMUSG00000030428" "ENSMUSG00000055254"
## [4] "ENSMUSG00000045092" "ENSMUSG00000056492"
## [1] ""
## [1] "ENSMUSG00000038642" "ENSMUSG00000036905" "ENSMUSG00000036896"
## [4] "ENSMUSG00000024621" "ENSMUSG00000036887"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_fasted, pcs.use = 1:9)

## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_fasted, dim.1 = 1, dim.2 = 2)

## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_fasted, dim.1 = 2, dim.2 = 3)

## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_fasted,
          pc.use = 5:18,
          cells.use = 500,
          do.balanced = TRUE,
          label.columns = FALSE)

## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_fasted <- ProjectPCA(object = seurat_campbell_fasted, do.print = TRUE)
## [1] "PC1"
##  [1] "ENSMUSG00000026385" "ENSMUSG00000002985" "ENSMUSG00000018593"
##  [4] "ENSMUSG00000031765" "ENSMUSG00000050953" "ENSMUSG00000000567"
##  [7] "ENSMUSG00000031762" "ENSMUSG00000026728" "ENSMUSG00000058135"
## [10] "ENSMUSG00000034467" "ENSMUSG00000070306" "ENSMUSG00000043164"
## [13] "ENSMUSG00000001025" "ENSMUSG00000035805" "ENSMUSG00000005360"
## [16] "ENSMUSG00000030342" "ENSMUSG00000094800" "ENSMUSG00000095538"
## [19] "ENSMUSG00000041323" "ENSMUSG00000017009" "ENSMUSG00000033208"
## [22] "ENSMUSG00000027800" "ENSMUSG00000019232" "ENSMUSG00000026649"
## [25] "ENSMUSG00000110332" "ENSMUSG00000026701" "ENSMUSG00000053931"
## [28] "ENSMUSG00000020473" "ENSMUSG00000024411" "ENSMUSG00000008540"
## [1] ""
##  [1] "ENSMUSG00000021268" "ENSMUSG00000044349" "ENSMUSG00000019986"
##  [4] "ENSMUSG00000050711" "ENSMUSG00000033061" "ENSMUSG00000040785"
##  [7] "ENSMUSG00000055430" "ENSMUSG00000021087" "ENSMUSG00000026576"
## [10] "ENSMUSG00000043388" "ENSMUSG00000024268" "ENSMUSG00000064341"
## [13] "ENSMUSG00000027581" "ENSMUSG00000019923" "ENSMUSG00000047261"
## [16] "ENSMUSG00000027273" "ENSMUSG00000042750" "ENSMUSG00000002265"
## [19] "ENSMUSG00000107169" "ENSMUSG00000029223" "ENSMUSG00000027350"
## [22] "ENSMUSG00000033981" "ENSMUSG00000043384" "ENSMUSG00000025468"
## [25] "ENSMUSG00000024261" "ENSMUSG00000027500" "ENSMUSG00000024423"
## [28] "ENSMUSG00000060188" "ENSMUSG00000000159" "ENSMUSG00000025579"
## [1] ""
## [1] ""
## [1] "PC2"
##  [1] "ENSMUSG00000070306" "ENSMUSG00000021268" "ENSMUSG00000026223"
##  [4] "ENSMUSG00000043164" "ENSMUSG00000019986" "ENSMUSG00000040785"
##  [7] "ENSMUSG00000021270" "ENSMUSG00000055430" "ENSMUSG00000021087"
## [10] "ENSMUSG00000041323" "ENSMUSG00000110332" "ENSMUSG00000020473"
## [13] "ENSMUSG00000044772" "ENSMUSG00000044349" "ENSMUSG00000031428"
## [16] "ENSMUSG00000022037" "ENSMUSG00000027744" "ENSMUSG00000043384"
## [19] "ENSMUSG00000024033" "ENSMUSG00000036438" "ENSMUSG00000032595"
## [22] "ENSMUSG00000050711" "ENSMUSG00000033061" "ENSMUSG00000042750"
## [25] "ENSMUSG00000094800" "ENSMUSG00000034467" "ENSMUSG00000009281"
## [28] "ENSMUSG00000072674" "ENSMUSG00000026576" "ENSMUSG00000047394"
## [1] ""
##  [1] "ENSMUSG00000027375" "ENSMUSG00000079018" "ENSMUSG00000075602"
##  [4] "ENSMUSG00000022584" "ENSMUSG00000024140" "ENSMUSG00000040584"
##  [7] "ENSMUSG00000041378" "ENSMUSG00000030237" "ENSMUSG00000056492"
## [10] "ENSMUSG00000030235" "ENSMUSG00000020154" "ENSMUSG00000029648"
## [13] "ENSMUSG00000033960" "ENSMUSG00000039167" "ENSMUSG00000027199"
## [16] "ENSMUSG00000020717" "ENSMUSG00000037625" "ENSMUSG00000022548"
## [19] "ENSMUSG00000026193" "ENSMUSG00000064373" "ENSMUSG00000026830"
## [22] "ENSMUSG00000027562" "ENSMUSG00000031425" "ENSMUSG00000036634"
## [25] "ENSMUSG00000076439" "ENSMUSG00000032517" "ENSMUSG00000020077"
## [28] "ENSMUSG00000031775" "ENSMUSG00000001946" "ENSMUSG00000015090"
## [1] ""
## [1] ""
## [1] "PC3"
##  [1] "ENSMUSG00000037625" "ENSMUSG00000026830" "ENSMUSG00000036634"
##  [4] "ENSMUSG00000076439" "ENSMUSG00000031425" "ENSMUSG00000032517"
##  [7] "ENSMUSG00000050121" "ENSMUSG00000015090" "ENSMUSG00000032060"
## [10] "ENSMUSG00000006782" "ENSMUSG00000027562" "ENSMUSG00000090639"
## [13] "ENSMUSG00000041607" "ENSMUSG00000032854" "ENSMUSG00000022425"
## [16] "ENSMUSG00000020774" "ENSMUSG00000070354" "ENSMUSG00000022548"
## [19] "ENSMUSG00000033579" "ENSMUSG00000046160" "ENSMUSG00000040759"
## [22] "ENSMUSG00000027858" "ENSMUSG00000090996" "ENSMUSG00000073680"
## [25] "ENSMUSG00000047976" "ENSMUSG00000013523" "ENSMUSG00000020486"
## [28] "ENSMUSG00000027199" "ENSMUSG00000037166" "ENSMUSG00000039904"
## [1] ""
##  [1] "ENSMUSG00000079018" "ENSMUSG00000075602" "ENSMUSG00000040584"
##  [4] "ENSMUSG00000022584" "ENSMUSG00000041378" "ENSMUSG00000056492"
##  [7] "ENSMUSG00000030237" "ENSMUSG00000020154" "ENSMUSG00000030235"
## [10] "ENSMUSG00000029648" "ENSMUSG00000039167" "ENSMUSG00000020717"
## [13] "ENSMUSG00000026193" "ENSMUSG00000033960" "ENSMUSG00000001946"
## [16] "ENSMUSG00000024140" "ENSMUSG00000034738" "ENSMUSG00000042745"
## [19] "ENSMUSG00000031239" "ENSMUSG00000020077" "ENSMUSG00000032232"
## [22] "ENSMUSG00000029802" "ENSMUSG00000019966" "ENSMUSG00000006386"
## [25] "ENSMUSG00000039349" "ENSMUSG00000036256" "ENSMUSG00000029484"
## [28] "ENSMUSG00000000530" "ENSMUSG00000026814" "ENSMUSG00000026921"
## [1] ""
## [1] ""
## [1] "PC4"
##  [1] "ENSMUSG00000022528" "ENSMUSG00000063564" "ENSMUSG00000007682"
##  [4] "ENSMUSG00000028195" "ENSMUSG00000045005" "ENSMUSG00000017390"
##  [7] "ENSMUSG00000047786" "ENSMUSG00000034640" "ENSMUSG00000029838"
## [10] "ENSMUSG00000022132" "ENSMUSG00000026701" "ENSMUSG00000056380"
## [13] "ENSMUSG00000021250" "ENSMUSG00000018451" "ENSMUSG00000035686"
## [16] "ENSMUSG00000027004" "ENSMUSG00000021732" "ENSMUSG00000033737"
## [19] "ENSMUSG00000044177" "ENSMUSG00000022419" "ENSMUSG00000027447"
## [22] "ENSMUSG00000030428" "ENSMUSG00000000247" "ENSMUSG00000045817"
## [25] "ENSMUSG00000000567" "ENSMUSG00000003545" "ENSMUSG00000049929"
## [28] "ENSMUSG00000005089" "ENSMUSG00000030905" "ENSMUSG00000058897"
## [1] ""
##  [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
##  [4] "ENSMUSG00000038370" "ENSMUSG00000027800" "ENSMUSG00000044772"
##  [7] "ENSMUSG00000032595" "ENSMUSG00000072674" "ENSMUSG00000027744"
## [10] "ENSMUSG00000020473" "ENSMUSG00000110332" "ENSMUSG00000021270"
## [13] "ENSMUSG00000041323" "ENSMUSG00000026683" "ENSMUSG00000047139"
## [16] "ENSMUSG00000108841" "ENSMUSG00000021950" "ENSMUSG00000095304"
## [19] "ENSMUSG00000045655" "ENSMUSG00000044475" "ENSMUSG00000026301"
## [22] "ENSMUSG00000072473" "ENSMUSG00000022449" "ENSMUSG00000046242"
## [25] "ENSMUSG00000096054" "ENSMUSG00000047394" "ENSMUSG00000033208"
## [28] "ENSMUSG00000045954" "ENSMUSG00000091345" "ENSMUSG00000037926"
## [1] ""
## [1] ""
## [1] "PC5"
##  [1] "ENSMUSG00000018451" "ENSMUSG00000029838" "ENSMUSG00000101111"
##  [4] "ENSMUSG00000068923" "ENSMUSG00000064341" "ENSMUSG00000065947"
##  [7] "ENSMUSG00000040785" "ENSMUSG00000064370" "ENSMUSG00000031633"
## [10] "ENSMUSG00000020315" "ENSMUSG00000064356" "ENSMUSG00000064367"
## [13] "ENSMUSG00000030654" "ENSMUSG00000022892" "ENSMUSG00000021087"
## [16] "ENSMUSG00000055430" "ENSMUSG00000100862" "ENSMUSG00000030428"
## [19] "ENSMUSG00000055254" "ENSMUSG00000079037" "ENSMUSG00000043384"
## [22] "ENSMUSG00000052727" "ENSMUSG00000015222" "ENSMUSG00000045092"
## [25] "ENSMUSG00000056492" "ENSMUSG00000005125" "ENSMUSG00000032766"
## [28] "ENSMUSG00000034723" "ENSMUSG00000017390" "ENSMUSG00000063564"
## [1] ""
##  [1] "ENSMUSG00000038642" "ENSMUSG00000036905" "ENSMUSG00000036896"
##  [4] "ENSMUSG00000024621" "ENSMUSG00000036887" "ENSMUSG00000030579"
##  [7] "ENSMUSG00000058715" "ENSMUSG00000021423" "ENSMUSG00000028581"
## [10] "ENSMUSG00000023992" "ENSMUSG00000046805" "ENSMUSG00000036353"
## [13] "ENSMUSG00000000982" "ENSMUSG00000089929" "ENSMUSG00000015852"
## [16] "ENSMUSG00000021665" "ENSMUSG00000024397" "ENSMUSG00000018008"
## [19] "ENSMUSG00000018930" "ENSMUSG00000026395" "ENSMUSG00000040552"
## [22] "ENSMUSG00000074622" "ENSMUSG00000069516" "ENSMUSG00000024401"
## [25] "ENSMUSG00000040229" "ENSMUSG00000022952" "ENSMUSG00000059498"
## [28] "ENSMUSG00000032691" "ENSMUSG00000030786" "ENSMUSG00000048163"
## [1] ""
## [1] ""

Determine statistically significant principal components

## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_fasted <- JackStraw(object = seurat_campbell_fasted,
                  num.replicate = 100,
                  display.progress = TRUE
                  )
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  2.14932174285253 mins
# Maximum number of PCs allowed = 20.


## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_fasted, PCs = 1:20)
## Warning: Removed 19616 rows containing missing values (geom_point).

## An object of class seurat in project CAMPBELL_FASTED 
##  21789 genes across 3640 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot 
PCElbowPlot(object = seurat_campbell_fasted)

Cell clustering

## Cluster cells by PC
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
         no.legend = TRUE,
         do.label = TRUE)

## 1379 variable genes = 16 clusters
## 1379 variable genes, 10 PC = 13 clusters
## 3006 variable genes, 20 PC = 16 clusters

table(seurat_campbell_fasted@ident)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 1091  451  376  335  252  242  188  184  134  132   64   47   40   38   33 
##   15 
##   33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164441 SRR5164444 SRR5164446
##   0     0.33306    0.21617    0.25075
##   1     0.09480    0.18629    0.17568
##   2     0.08940    0.10896    0.14865
##   3     0.11185    0.06854    0.04054
##   4     0.07401    0.06151    0.05856
##   5     0.07069    0.10018    0.02252
##   6     0.04865    0.08436    0.03453
##   7     0.04200    0.08084    0.05556
##   8     0.03035    0.01406    0.07958
##   9     0.03493    0.04042    0.03754
##   10    0.02412    0.00000    0.00901
##   11    0.01331    0.02109    0.00450
##   12    0.00457    0.00527    0.03904
##   13    0.01414    0.00703    0.00000
##   14    0.01081    0.00527    0.00601
##   15    0.00333    0.00000    0.03754
TSNEPlot(object = seurat_campbell_fasted,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))

colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident,
                                            seurat_campbell_fasted@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20, 
    resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
         no.legend = TRUE,
         do.label = TRUE)

##  1379 variable genes = 19 clusters
## 1379 variable genes, 10 PC = 15 clusters
## 3006 variable genes, 20 PC = 18 clusters

table(seurat_campbell_fasted@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 572 493 450 337 276 270 249 184 177 134 106  98  64  47  43  38  35  34 
##  18 
##  33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164441 SRR5164444 SRR5164446
##   0     0.16674    0.10018    0.17117
##   1     0.15967    0.11072    0.06907
##   2     0.09439    0.18629    0.17568
##   3     0.11185    0.06854    0.04354
##   4     0.05904    0.10545    0.11111
##   5     0.07900    0.06503    0.06456
##   6     0.07277    0.10193    0.02402
##   7     0.04699    0.08436    0.03453
##   8     0.03992    0.07733    0.05556
##   9     0.03035    0.01406    0.07958
##   10    0.03243    0.00703    0.03604
##   11    0.02495    0.02636    0.03453
##   12    0.02412    0.00000    0.00901
##   13    0.01331    0.02109    0.00450
##   14    0.00582    0.00527    0.03904
##   15    0.01414    0.00703    0.00000
##   16    0.00998    0.01406    0.00450
##   17    0.01123    0.00527    0.00601
##   18    0.00333    0.00000    0.03754
TSNEPlot(object = seurat_campbell_fasted,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))

colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident, 
                                            seurat_campbell_fasted@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20, 
    resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
         no.legend = TRUE,
         do.label = TRUE)

##  1379 variable genes = 20 clusters
## 1379 variable genes, 10 PC = 19 clusters
## 3006 variable genes, 20 PC = 20 clusters

table(seurat_campbell_fasted@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 490 464 449 337 271 269 252 189 184 134 106 102  99  64  47  43  38  35 
##  18  19 
##  34  33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164441 SRR5164444 SRR5164446
##   0     0.15925    0.10721    0.06907
##   1     0.15551    0.07557    0.07057
##   2     0.09439    0.18453    0.17568
##   3     0.11185    0.06854    0.04354
##   4     0.07942    0.06503    0.06456
##   5     0.05696    0.10193    0.11111
##   6     0.07401    0.10193    0.02402
##   7     0.04823    0.08612    0.03604
##   8     0.04200    0.08084    0.05556
##   9     0.03035    0.01406    0.07958
##   10    0.03243    0.00703    0.03604
##   11    0.00832    0.02812    0.09910
##   12    0.02536    0.02636    0.03453
##   13    0.02412    0.00000    0.00901
##   14    0.01331    0.02109    0.00450
##   15    0.00582    0.00527    0.03904
##   16    0.01414    0.00703    0.00000
##   17    0.00998    0.01406    0.00450
##   18    0.01123    0.00527    0.00601
##   19    0.00333    0.00000    0.03754
TSNEPlot(object = seurat_campbell_fasted,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4
         )

proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))

colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident, 
                                            seurat_campbell_fasted@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

#######
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20, 
    resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
         no.legend = TRUE,
         do.label = TRUE)

##  1379 variable genes = 23 clusters
## 1379 variable genes, 10 PC = 21 clusters
## 3006 variable genes, 20 PC = 21 clusters

table(seurat_campbell_fasted@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 476 431 258 253 251 249 232 219 189 177 134 125 119  97  94  64  47  45 
##  18  19  20  21  22 
##  40  38  35  34  33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)

proportion_table<- round(prop.table(proportion_table, 2), 5)

proportion_table
##     
##      SRR5164441 SRR5164444 SRR5164446
##   0     0.15593    0.10369    0.06306
##   1     0.14761    0.06327    0.06006
##   2     0.05447    0.10193    0.10360
##   3     0.07443    0.10193    0.02402
##   4     0.07110    0.06678    0.06306
##   5     0.08690    0.03339    0.03153
##   6     0.08690    0.02460    0.01351
##   7     0.00748    0.16169    0.16366
##   8     0.04823    0.08612    0.03604
##   9     0.03992    0.07733    0.05556
##   10    0.03035    0.01406    0.07958
##   11    0.03701    0.01054    0.04505
##   12    0.01247    0.03339    0.10511
##   13    0.02495    0.02636    0.03303
##   14    0.02661    0.03691    0.01351
##   15    0.02412    0.00000    0.00901
##   16    0.01331    0.02109    0.00450
##   17    0.00624    0.00527    0.04054
##   18    0.01331    0.00527    0.00751
##   19    0.01414    0.00703    0.00000
##   20    0.00998    0.01406    0.00450
##   21    0.01123    0.00527    0.00601
##   22    0.00333    0.00000    0.03754
TSNEPlot(object = seurat_campbell_fasted,
         group.by = "replicate_name",
         no.legend = FALSE,
         do.label = FALSE,
         pt.size = 0.4 )

proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))

colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident, 
                                            seurat_campbell_fasted@meta.data$replicate_name))

chart.Correlation(data.frame( proportion_table[1:5,]) )

# cor(proportion_table[1],proportion_table[4])

## Cluster cells using final parameters (1817 genes, 20 PC, resolution = 0.6)
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)


## Produce t-SNE of final parameters:
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)

Use 1379 variable genes and 20 principal components with a resolution of 0.6. This gives a total of 16 clusters.

Finding differentially expressed genes between cell clusters (cluster biomarkers)

## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_fasted_biomarkers <- FindAllMarkers(object = seurat_campbell_fasted, only.pos = FALSE, min.pct = 0.2)


## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_fasted_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 160 x 7
## # Groups:   cluster [16]
##        p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene              
##        <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>             
##  1 5.87e-131     0.628 0.976 0.795 1.28e-126 0       ENSMUSG00000055430
##  2 5.43e- 86     0.787 0.7   0.37  1.18e- 81 0       ENSMUSG00000035864
##  3 3.11e- 55     0.645 0.423 0.179 6.77e- 51 0       ENSMUSG00000054459
##  4 7.29e- 54     0.613 0.602 0.353 1.59e- 49 0       ENSMUSG00000021700
##  5 1.01e- 53     0.605 0.611 0.364 2.20e- 49 0       ENSMUSG00000066392
##  6 4.19e- 51     0.698 0.384 0.16  9.12e- 47 0       ENSMUSG00000010803
##  7 4.83e- 51     0.836 0.555 0.341 1.05e- 46 0       ENSMUSG00000026787
##  8 6.40e- 50     0.676 0.313 0.113 1.39e- 45 0       ENSMUSG00000037771
##  9 2.58e- 49     0.630 0.511 0.276 5.62e- 45 0       ENSMUSG00000048978
## 10 4.72e- 41     0.770 0.239 0.082 1.03e- 36 0       ENSMUSG00000036357
## # ... with 150 more rows
# write.csv(as.data.frame(seurat_campbell_fasted_biomarkers), file = "seurat_campbell_fasted_biomarkers.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_fasted_biomarkers.csv", quote = FALSE)




## Perform ROC DE test. This can take a long time.
seurat_campbell_fasted_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_fasted, only.pos = FALSE, min.pct = 0.2, test.use = "roc")

top10_seurat_campbell_markers_ROC<- seurat_campbell_fasted_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 142 x 9
## # Groups:   cluster [16]
##    myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene      
##    <dbl>    <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>     
##  1 0.754    0.628 0.508     0.628 0.976 0.795        NA 0       ENSMUSG00…
##  2 0.728    0.515 0.456     0.515 0.99  0.825        NA 0       ENSMUSG00…
##  3 0.994    4.12  0.988     4.12  0.991 0.105        NA 1       ENSMUSG00…
##  4 0.988    4.86  0.976     4.86  0.98  0.143        NA 1       ENSMUSG00…
##  5 0.845    0.995 0.69      0.995 0.976 0.761        NA 1       ENSMUSG00…
##  6 0.818    1.63  0.636     1.63  0.725 0.137        NA 1       ENSMUSG00…
##  7 0.772    1.66  0.544     1.66  0.561 0.023        NA 1       ENSMUSG00…
##  8 0.736    1.30  0.472     1.30  0.563 0.12         NA 1       ENSMUSG00…
##  9 0.732    1.35  0.464     1.35  0.499 0.041        NA 1       ENSMUSG00…
## 10 0.725    1.40  0.450     1.40  0.49  0.049        NA 1       ENSMUSG00…
## # ... with 132 more rows
# write.csv(as.data.frame(seurat_campbell_fasted_biomarkers_ROC), file = "seurat_campbell_fasted_biomarkers_ROC.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_fasted_biomarkers_ROC.csv", quote = FALSE)


## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_fasted,
          genes.use = top10_seurat_campbell_markers$gene,
          slim.col.label = TRUE,
          remove.key = TRUE)

Save seurat object of chow mice.

## save seurat object as .rds 
# saveRDS(seurat_campbell_fasted, file = "./seurat_campbell_fasted_final.rds")

Analysis of Campbell scRNAseq data for mice on a LOW FAT DIET(1 replicate) using seurat

Load LFD mouse data

## Load seurat object
seurat_campbell_lfd<- readRDS(file = "./seurat_campbell_lfd_just_created.rds")

seurat_campbell_lfd
## An object of class seurat in project CAMPBELL_LFD 
##  23273 genes across 3347 samples.

Quality control on CHOW mouse data

## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)


## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url


## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_lfd@raw.data[mito_genes, ]

mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]

seurat_campbell_lfd@raw.data[mito_genes_present, 1:5]
##                    AAAAAAGATACT AAAACGAGTACC AAAACTAAAACA AAAAGGTGGGTC
## ENSMUSG00000000028            0            0            0            0
## ENSMUSG00000000037            0            0            0            0
## ENSMUSG00000000149            0            0            0            0
## ENSMUSG00000000148            0            1            0            0
## ENSMUSG00000000202            0            0            0            0
## ENSMUSG00000000154            0            0            0            0
## ENSMUSG00000000134            0            0            0            0
## ENSMUSG00000000126            0            0            0            0
## ENSMUSG00000000085            0            0            0            0
## ENSMUSG00000000194            0            0            0            0
## ENSMUSG00000000093            0            0            0            0
## ENSMUSG00000000167            0            0            0            0
## ENSMUSG00000000094            0            0            0            0
## ENSMUSG00000000131            0            0            0            0
## ENSMUSG00000000120            0            0            0            0
## ENSMUSG00000000197            0            0            0            0
## ENSMUSG00000000058            0            0            0            0
## ENSMUSG00000000159            4           15            0            0
## ENSMUSG00000000168            0            0            0            0
## ENSMUSG00000000056            1            0            0            0
## ENSMUSG00000000049            0            0            0            0
## ENSMUSG00000000184            1            1            0            1
## ENSMUSG00000000142            0            0            0            0
## ENSMUSG00000000078            0            1            0            0
## ENSMUSG00000000001            0            0            0            1
## ENSMUSG00000000223            0            0            0            0
## ENSMUSG00000000127            0            0            0            0
## ENSMUSG00000000171            4            1            0            0
## ENSMUSG00000000214            0            0            0            0
## ENSMUSG00000000088            1            0            0            1
## ENSMUSG00000000125            0            0            0            0
## ENSMUSG00000000031            0            0            0            0
##                    AAAATCAGCTTC
## ENSMUSG00000000028            0
## ENSMUSG00000000037            0
## ENSMUSG00000000149            0
## ENSMUSG00000000148            0
## ENSMUSG00000000202            0
## ENSMUSG00000000154            0
## ENSMUSG00000000134            0
## ENSMUSG00000000126            0
## ENSMUSG00000000085            0
## ENSMUSG00000000194            0
## ENSMUSG00000000093            0
## ENSMUSG00000000167            0
## ENSMUSG00000000094            0
## ENSMUSG00000000131            0
## ENSMUSG00000000120            0
## ENSMUSG00000000197            0
## ENSMUSG00000000058            0
## ENSMUSG00000000159            4
## ENSMUSG00000000168            0
## ENSMUSG00000000056            0
## ENSMUSG00000000049            0
## ENSMUSG00000000184            0
## ENSMUSG00000000142            0
## ENSMUSG00000000078            0
## ENSMUSG00000000001            0
## ENSMUSG00000000223            0
## ENSMUSG00000000127            0
## ENSMUSG00000000171            1
## ENSMUSG00000000214            0
## ENSMUSG00000000088            0
## ENSMUSG00000000125            0
## ENSMUSG00000000031            0
dim(seurat_campbell_lfd@raw.data[mito_genes_present, ])
## [1]   32 3347
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_lfd@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_lfd@raw.data)


## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0007315 0.0012928 0.0015295 0.0020677 0.0076620
## add percentage mitocondrial genes into metadata
seurat_campbell_lfd <- AddMetaData(object = seurat_campbell_lfd,
                    metadata = percent_mito,
                    col.name = "percent_mito")


## Look at the seurat object meta data
head(seurat_campbell_lfd@meta.data)
##              nGene nUMI   orig.ident replicate_name percent_mito
## AAAAAAGATACT  2806 5638 CAMPBELL_LFD     SRR5164439 0.0019517388
## AAAACGAGTACC  3030 7629 CAMPBELL_LFD     SRR5164439 0.0024924570
## AAAACTAAAACA   964 1996 CAMPBELL_LFD     SRR5164439 0.0000000000
## AAAAGGTGGGTC  2747 6120 CAMPBELL_LFD     SRR5164439 0.0004903563
## AAAATCAGCTTC  2886 5655 CAMPBELL_LFD     SRR5164439 0.0008849558
## AAAATGAGACGG  1236 1768 CAMPBELL_LFD     SRR5164439 0.0011312217
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_lfd,
        features.plot = c("nGene", "nUMI", "percent_mito"),
        nCol = 3,
        x.lab.rot = TRUE,
        point.size.use = 0.2
        )

## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)

GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)

Filter cells after QC

Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 5000 genes expressed.

## manual check; I already know all cells have >800 genes
table(seurat_campbell_lfd@meta.data$percent_mito < 0.004 & seurat_campbell_lfd@meta.data$nGene<5000)
## 
## FALSE  TRUE 
##   213  3134
# FALSE  TRUE 
#   213  3134 
 
## Filter cells with <0.4% percent_mito and <5000 genes
seurat_campbell_lfd <- FilterCells(object = seurat_campbell_lfd,
                    subset.names = c("nGene", "percent_mito"),
                    low.thresholds = c(800, -Inf),
                    high.thresholds = c(5000, 0.004))
 
seurat_campbell_lfd
## An object of class seurat in project CAMPBELL_LFD 
##  23273 genes across 3134 samples.
# An object of class seurat in project CAMPBELL_FASTED 
# 21789 genes across 3640 samples.

Log normalise gene expression per cell

## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_lfd@data),
     breaks = 100,
     main = "Total expression before normalisation",
     xlab = "Sum of expression")

## Normalise gene expression per cell
seurat_campbell_lfd <- NormalizeData(object = seurat_campbell_lfd, normalization.method = "LogNormalize", scale.factor = 10000)

## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_lfd@data))),
     breaks = 100,
     main = "Total expression after normalisation",
     xlab = "Sum of expression")

Find genes whose expression varies between cells.

Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.

## Find variable genes by expression
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd,
                          mean.function = ExpMean,
                          dispersion.function = LogVMR,
                          x.low.cutoff = 0.05,
                          x.high.cutoff = 3,
                          y.cutoff = 0.75,
                          num.bin = 20,
                          binning.method = "equal_width"
                          )

# number of variable genes
length(seurat_campbell_lfd@var.genes)
## [1] 1480

seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1484 variable genes

seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 2420 variable genes

seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1986 variable genes

seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 3189 variable genes

seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )

                      == 3122 variable gene

seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1480 variable gene
                      

Scale normalised gene expression per cell to remove unwanted sources of variation

Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.

## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_lfd <- ScaleData(object = seurat_campbell_lfd, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
## 
## Time Elapsed:  29.3174724578857 secs
## Scaling data matrix

Principal component anlysis of variable genes.

Principal component anlysis of variable genes for use in cell clustering.

## Perform principal component analysis on variable genes
seurat_campbell_lfd <- RunPCA(object = seurat_campbell_lfd,
               pc.genes = seurat_campbell_lfd@var.genes,
               do.print = TRUE,
               pcs.print = 1:5,
               genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000050711" "ENSMUSG00000022577" "ENSMUSG00000043388"
## [4] "ENSMUSG00000036699" "ENSMUSG00000044349"
## [1] ""
## [1] "ENSMUSG00000031765" "ENSMUSG00000031762" "ENSMUSG00000026701"
## [4] "ENSMUSG00000001025" "ENSMUSG00000000567"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000037852" "ENSMUSG00000055254" "ENSMUSG00000026701"
## [4] "ENSMUSG00000017390" "ENSMUSG00000035805"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000079018" "ENSMUSG00000075602"
## [4] "ENSMUSG00000041378" "ENSMUSG00000040584"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000058715" "ENSMUSG00000036896" "ENSMUSG00000036887"
## [4] "ENSMUSG00000036905" "ENSMUSG00000030579"
## [1] ""
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000076439"
## [4] "ENSMUSG00000026830" "ENSMUSG00000032854"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000079018" "ENSMUSG00000040584" "ENSMUSG00000041378"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036905" "ENSMUSG00000036887"
## [4] "ENSMUSG00000058715" "ENSMUSG00000024621"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000026830" "ENSMUSG00000050121" "ENSMUSG00000015090"
## [4] "ENSMUSG00000076439" "ENSMUSG00000027375"
## [1] ""
## [1] "ENSMUSG00000031610" "ENSMUSG00000026249" "ENSMUSG00000026424"
## [4] "ENSMUSG00000029231" "ENSMUSG00000019874"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_lfd, pcs.use = 1:9)

## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_lfd, dim.1 = 1, dim.2 = 2)

## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_lfd, dim.1 = 2, dim.2 = 3)

## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_lfd,
          pc.use = 5:18,
          cells.use = 500,
          do.balanced = TRUE,
          label.columns = FALSE)

## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_lfd <- ProjectPCA(object = seurat_campbell_lfd, do.print = TRUE)
## [1] "PC1"
##  [1] "ENSMUSG00000033061" "ENSMUSG00000047261" "ENSMUSG00000027581"
##  [4] "ENSMUSG00000050711" "ENSMUSG00000025468" "ENSMUSG00000024268"
##  [7] "ENSMUSG00000022577" "ENSMUSG00000043388" "ENSMUSG00000055430"
## [10] "ENSMUSG00000036699" "ENSMUSG00000021087" "ENSMUSG00000029223"
## [13] "ENSMUSG00000019986" "ENSMUSG00000044349" "ENSMUSG00000026576"
## [16] "ENSMUSG00000027350" "ENSMUSG00000027500" "ENSMUSG00000019923"
## [19] "ENSMUSG00000035964" "ENSMUSG00000059361" "ENSMUSG00000042750"
## [22] "ENSMUSG00000031840" "ENSMUSG00000018965" "ENSMUSG00000060188"
## [25] "ENSMUSG00000071658" "ENSMUSG00000027273" "ENSMUSG00000066705"
## [28] "ENSMUSG00000040785" "ENSMUSG00000022658" "ENSMUSG00000020297"
## [1] ""
##  [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
##  [4] "ENSMUSG00000031762" "ENSMUSG00000026701" "ENSMUSG00000001025"
##  [7] "ENSMUSG00000000567" "ENSMUSG00000026728" "ENSMUSG00000067786"
## [10] "ENSMUSG00000035805" "ENSMUSG00000058135" "ENSMUSG00000022528"
## [13] "ENSMUSG00000029838" "ENSMUSG00000050953" "ENSMUSG00000018593"
## [16] "ENSMUSG00000005360" "ENSMUSG00000095538" "ENSMUSG00000055254"
## [19] "ENSMUSG00000027712" "ENSMUSG00000044080" "ENSMUSG00000018102"
## [22] "ENSMUSG00000008540" "ENSMUSG00000027447" "ENSMUSG00000032231"
## [25] "ENSMUSG00000036570" "ENSMUSG00000034467" "ENSMUSG00000063564"
## [28] "ENSMUSG00000031342" "ENSMUSG00000008575" "ENSMUSG00000017390"
## [1] ""
## [1] ""
## [1] "PC2"
##  [1] "ENSMUSG00000018451" "ENSMUSG00000037852" "ENSMUSG00000067786"
##  [4] "ENSMUSG00000026223" "ENSMUSG00000055254" "ENSMUSG00000101111"
##  [7] "ENSMUSG00000052727" "ENSMUSG00000039278" "ENSMUSG00000031760"
## [10] "ENSMUSG00000026701" "ENSMUSG00000058254" "ENSMUSG00000031428"
## [13] "ENSMUSG00000064370" "ENSMUSG00000017390" "ENSMUSG00000021270"
## [16] "ENSMUSG00000046432" "ENSMUSG00000116358" "ENSMUSG00000021268"
## [19] "ENSMUSG00000035805" "ENSMUSG00000021379" "ENSMUSG00000032324"
## [22] "ENSMUSG00000025666" "ENSMUSG00000001025" "ENSMUSG00000006373"
## [25] "ENSMUSG00000050071" "ENSMUSG00000079037" "ENSMUSG00000000567"
## [28] "ENSMUSG00000019505" "ENSMUSG00000044550" "ENSMUSG00000030647"
## [1] ""
##  [1] "ENSMUSG00000020077" "ENSMUSG00000079018" "ENSMUSG00000075602"
##  [4] "ENSMUSG00000041378" "ENSMUSG00000040584" "ENSMUSG00000036896"
##  [7] "ENSMUSG00000022584" "ENSMUSG00000016494" "ENSMUSG00000058715"
## [10] "ENSMUSG00000036887" "ENSMUSG00000036905" "ENSMUSG00000056492"
## [13] "ENSMUSG00000030235" "ENSMUSG00000028581" "ENSMUSG00000029484"
## [16] "ENSMUSG00000030237" "ENSMUSG00000024621" "ENSMUSG00000030579"
## [19] "ENSMUSG00000021423" "ENSMUSG00000020154" "ENSMUSG00000023992"
## [22] "ENSMUSG00000024397" "ENSMUSG00000033960" "ENSMUSG00000029622"
## [25] "ENSMUSG00000036256" "ENSMUSG00000038642" "ENSMUSG00000001123"
## [28] "ENSMUSG00000015852" "ENSMUSG00000000530" "ENSMUSG00000060802"
## [1] ""
## [1] ""
## [1] "PC3"
##  [1] "ENSMUSG00000058715" "ENSMUSG00000036896" "ENSMUSG00000036887"
##  [4] "ENSMUSG00000036905" "ENSMUSG00000030579" "ENSMUSG00000028581"
##  [7] "ENSMUSG00000023992" "ENSMUSG00000024621" "ENSMUSG00000021423"
## [10] "ENSMUSG00000024397" "ENSMUSG00000015852" "ENSMUSG00000038642"
## [13] "ENSMUSG00000046805" "ENSMUSG00000048163" "ENSMUSG00000027447"
## [16] "ENSMUSG00000036353" "ENSMUSG00000000682" "ENSMUSG00000059498"
## [19] "ENSMUSG00000040229" "ENSMUSG00000030786" "ENSMUSG00000052160"
## [22] "ENSMUSG00000040747" "ENSMUSG00000054675" "ENSMUSG00000030844"
## [25] "ENSMUSG00000018008" "ENSMUSG00000020377" "ENSMUSG00000026126"
## [28] "ENSMUSG00000044811" "ENSMUSG00000069516" "ENSMUSG00000018451"
## [1] ""
##  [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000076439"
##  [4] "ENSMUSG00000026830" "ENSMUSG00000032854" "ENSMUSG00000027375"
##  [7] "ENSMUSG00000036634" "ENSMUSG00000050121" "ENSMUSG00000032517"
## [10] "ENSMUSG00000031775" "ENSMUSG00000033579" "ENSMUSG00000032060"
## [13] "ENSMUSG00000027562" "ENSMUSG00000046160" "ENSMUSG00000073680"
## [16] "ENSMUSG00000022425" "ENSMUSG00000041607" "ENSMUSG00000037166"
## [19] "ENSMUSG00000006782" "ENSMUSG00000015090" "ENSMUSG00000027858"
## [22] "ENSMUSG00000020486" "ENSMUSG00000040759" "ENSMUSG00000013523"
## [25] "ENSMUSG00000027199" "ENSMUSG00000026888" "ENSMUSG00000022090"
## [28] "ENSMUSG00000043448" "ENSMUSG00000020774" "ENSMUSG00000090996"
## [1] ""
## [1] ""
## [1] "PC4"
##  [1] "ENSMUSG00000079018" "ENSMUSG00000040584" "ENSMUSG00000041378"
##  [4] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000030235"
##  [7] "ENSMUSG00000022584" "ENSMUSG00000030237" "ENSMUSG00000020154"
## [10] "ENSMUSG00000033960" "ENSMUSG00000001946" "ENSMUSG00000029648"
## [13] "ENSMUSG00000036256" "ENSMUSG00000031239" "ENSMUSG00000026193"
## [16] "ENSMUSG00000039167" "ENSMUSG00000025902" "ENSMUSG00000029086"
## [19] "ENSMUSG00000042116" "ENSMUSG00000028776" "ENSMUSG00000034738"
## [22] "ENSMUSG00000061353" "ENSMUSG00000039349" "ENSMUSG00000030413"
## [25] "ENSMUSG00000000530" "ENSMUSG00000006386" "ENSMUSG00000020717"
## [28] "ENSMUSG00000027435" "ENSMUSG00000009687" "ENSMUSG00000045954"
## [1] ""
##  [1] "ENSMUSG00000036896" "ENSMUSG00000036905" "ENSMUSG00000036887"
##  [4] "ENSMUSG00000058715" "ENSMUSG00000024621" "ENSMUSG00000028581"
##  [7] "ENSMUSG00000021423" "ENSMUSG00000023992" "ENSMUSG00000030579"
## [10] "ENSMUSG00000024397" "ENSMUSG00000038642" "ENSMUSG00000015852"
## [13] "ENSMUSG00000048163" "ENSMUSG00000046805" "ENSMUSG00000036353"
## [16] "ENSMUSG00000090639" "ENSMUSG00000052160" "ENSMUSG00000059498"
## [19] "ENSMUSG00000030786" "ENSMUSG00000018008" "ENSMUSG00000000682"
## [22] "ENSMUSG00000040747" "ENSMUSG00000040229" "ENSMUSG00000054675"
## [25] "ENSMUSG00000036908" "ENSMUSG00000020377" "ENSMUSG00000098112"
## [28] "ENSMUSG00000026126" "ENSMUSG00000044811" "ENSMUSG00000070354"
## [1] ""
## [1] ""
## [1] "PC5"
##  [1] "ENSMUSG00000024661" "ENSMUSG00000026830" "ENSMUSG00000050121"
##  [4] "ENSMUSG00000015090" "ENSMUSG00000076439" "ENSMUSG00000027375"
##  [7] "ENSMUSG00000022548" "ENSMUSG00000027562" "ENSMUSG00000037625"
## [10] "ENSMUSG00000032517" "ENSMUSG00000036634" "ENSMUSG00000037166"
## [13] "ENSMUSG00000020774" "ENSMUSG00000022090" "ENSMUSG00000020486"
## [16] "ENSMUSG00000026421" "ENSMUSG00000043448" "ENSMUSG00000090639"
## [19] "ENSMUSG00000073680" "ENSMUSG00000064351" "ENSMUSG00000026255"
## [22] "ENSMUSG00000038155" "ENSMUSG00000052727" "ENSMUSG00000101111"
## [25] "ENSMUSG00000041607" "ENSMUSG00000100862" "ENSMUSG00000024810"
## [28] "ENSMUSG00000090996" "ENSMUSG00000083563" "ENSMUSG00000039904"
## [1] ""
##  [1] "ENSMUSG00000031610" "ENSMUSG00000026249" "ENSMUSG00000026424"
##  [4] "ENSMUSG00000029231" "ENSMUSG00000019874" "ENSMUSG00000106379"
##  [7] "ENSMUSG00000067879" "ENSMUSG00000032911" "ENSMUSG00000045532"
## [10] "ENSMUSG00000052229" "ENSMUSG00000079056" "ENSMUSG00000039830"
## [13] "ENSMUSG00000016995" "ENSMUSG00000032482" "ENSMUSG00000033208"
## [16] "ENSMUSG00000069763" "ENSMUSG00000068748" "ENSMUSG00000046160"
## [19] "ENSMUSG00000063297" "ENSMUSG00000017386" "ENSMUSG00000034164"
## [22] "ENSMUSG00000034353" "ENSMUSG00000034000" "ENSMUSG00000028655"
## [25] "ENSMUSG00000022122" "ENSMUSG00000069662" "ENSMUSG00000026955"
## [28] "ENSMUSG00000030317" "ENSMUSG00000006800" "ENSMUSG00000086596"
## [1] ""
## [1] ""

Determine statistically significant principal components

## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_lfd <- JackStraw(object = seurat_campbell_lfd,
                  num.replicate = 100,
                  display.progress = TRUE
                  )
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  1.89125941197077 mins
# Maximum number of PCs allowed = 20.


## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_lfd, PCs = 1:20)
## Warning: Removed 20911 rows containing missing values (geom_point).

## An object of class seurat in project CAMPBELL_LFD 
##  23273 genes across 3134 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot 
PCElbowPlot(object = seurat_campbell_lfd)

Cell clustering

## Cluster cells by PC
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
         no.legend = TRUE,
         do.label = TRUE)

## 1986 variable genes, 20 PC = 14 clusters
## 1986 variable genes, 16 PC = 14 clusters

table(seurat_campbell_lfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13 
## 639 617 496 347 206 200 122 120 107  89  77  50  35  29
#######
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16, 
    resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
         no.legend = TRUE,
         do.label = TRUE)

##  1986 variable genes, 20 PC = 18 clusters
## 1986 variable genes, 16 PC = 16 clusters

table(seurat_campbell_lfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 
## 584 508 491 312 225 206 141 122 113 107  89  86  50  36  35  29
#######
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16, 
    resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
         no.legend = TRUE,
         do.label = TRUE)

##  1986 variable genes, 20 PC = 18 clusters
## 1986 variable genes, 16 PC = 18 clusters

table(seurat_campbell_lfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 490 429 315 284 240 225 173 170 138 123 113 109  89  86  50  36  35  29
#######
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16, 
    resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
         no.legend = TRUE,
         do.label = TRUE)

##  1986 variable genes, 20 PC = 20 clusters
## 1986 variable genes, 16 PC = 20 clusters

table(seurat_campbell_lfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 380 287 255 253 242 224 205 173 168 140 139 123 113 107  89  86  50  36 
##  18  19 
##  35  29
## Cluster cells using final parameters (1986 genes, 16 PC, resolution = 1.0)
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16, resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)


## Produce t-SNE of final parameters:
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)

Use 1986 variable genes and 16 principal components with a resolution of 1.0 (Changing number of PC makes very little difference to the clustering). This gives a total of 16 clusters.

Finding differentially expressed genes between cell clusters (cluster biomarkers)

## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_lfd_biomarkers <- FindAllMarkers(object = seurat_campbell_lfd, only.pos = FALSE, min.pct = 0.2)


## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_lfd_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 160 x 7
## # Groups:   cluster [16]
##        p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene              
##        <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>             
##  1 2.19e-149     0.871 0.793 0.218 5.09e-145 0       ENSMUSG00000054459
##  2 1.06e-145     0.895 0.981 0.461 2.46e-141 0       ENSMUSG00000047261
##  3 6.87e-140     0.886 0.916 0.346 1.60e-135 0       ENSMUSG00000027273
##  4 2.39e-131     0.830 0.94  0.409 5.56e-127 0       ENSMUSG00000027500
##  5 2.15e-113     0.890 0.899 0.403 4.99e-109 0       ENSMUSG00000000159
##  6 2.58e-113     0.871 0.786 0.281 6.01e-109 0       ENSMUSG00000035864
##  7 6.48e-107     0.811 0.981 0.727 1.51e-102 0       ENSMUSG00000021268
##  8 1.03e-101     0.871 0.536 0.135 2.41e- 97 0       ENSMUSG00000028222
##  9 8.29e- 23     1.28  0.337 0.159 1.93e- 18 0       ENSMUSG00000020660
## 10 3.17e- 17     1.20  0.267 0.129 7.38e- 13 0       ENSMUSG00000004366
## # ... with 150 more rows
# write.csv(as.data.frame(seurat_campbell_lfd_biomarkers), file = "seurat_campbell_lfd_biomarkers.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_lfd_biomarkers.csv", quote = FALSE)




## Perform ROC DE test. This can take a long time.
seurat_campbell_lfd_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_lfd, only.pos = FALSE, min.pct = 0.2, test.use = "roc")

top10_seurat_campbell_markers_ROC<- seurat_campbell_lfd_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 160 x 9
## # Groups:   cluster [16]
##    myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene      
##    <dbl>    <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>     
##  1 0.825    0.895 0.650     0.895 0.981 0.461        NA 0       ENSMUSG00…
##  2 0.805    0.886 0.61      0.886 0.916 0.346        NA 0       ENSMUSG00…
##  3 0.803    0.830 0.606     0.830 0.94  0.409        NA 0       ENSMUSG00…
##  4 0.789    0.811 0.578     0.811 0.981 0.727        NA 0       ENSMUSG00…
##  5 0.787    0.871 0.574     0.871 0.793 0.218        NA 0       ENSMUSG00…
##  6 0.78     0.890 0.56      0.890 0.899 0.403        NA 0       ENSMUSG00…
##  7 0.77     0.775 0.54      0.775 0.988 0.823        NA 0       ENSMUSG00…
##  8 0.761    0.871 0.522     0.871 0.786 0.281        NA 0       ENSMUSG00…
##  9 0.708    0.778 0.416     0.778 0.63  0.21         NA 0       ENSMUSG00…
## 10 0.702    0.871 0.404     0.871 0.536 0.135        NA 0       ENSMUSG00…
## # ... with 150 more rows
# write.csv(as.data.frame(seurat_campbell_lfd_biomarkers_ROC), file = "seurat_campbell_lfd_biomarkers_ROC.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_lfd_biomarkers_ROC.csv", quote = FALSE)


## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_lfd,
          genes.use = top10_seurat_campbell_markers$gene,
          slim.col.label = TRUE,
          remove.key = TRUE)

Save seurat object of chow mice.

## save seurat object as .rds 
# saveRDS(seurat_campbell_lfd, file = "./seurat_campbell_lfd_final.rds")

Analysis of Campbell scRNAseq data for mice on a High FAT DIET (1 replicate) using seurat

Load HFD mouse data

## Load seurat object
seurat_campbell_hfd<- readRDS(file = "./seurat_campbell_hfd_just_created.rds")

seurat_campbell_hfd
## An object of class seurat in project CAMPBELL_HFD 
##  23685 genes across 3778 samples.

Quality control on CHOW mouse data

## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)


## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url


## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_hfd@raw.data[mito_genes, ]

mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]

seurat_campbell_hfd@raw.data[mito_genes_present, 1:5]
##                    AAAAAATGGAGC AAAACGCGTGTC AAAACGGACAGA AAAACGTAACAG
## ENSMUSG00000000028            0            0            0            0
## ENSMUSG00000000037            0            0            0            0
## ENSMUSG00000000154            0            0            0            0
## ENSMUSG00000000149            1            0            0            0
## ENSMUSG00000000202            0            0            0            1
## ENSMUSG00000000159            0            1            5            0
## ENSMUSG00000000142            0            0            0            0
## ENSMUSG00000000127            0            1            0            0
## ENSMUSG00000000085            0            0            1            0
## ENSMUSG00000000194            0            0            0            0
## ENSMUSG00000000093            0            0            0            0
## ENSMUSG00000000168            0            0            0            0
## ENSMUSG00000000120            0            0            0            0
## ENSMUSG00000000134            0            0            0            0
## ENSMUSG00000000125            0            0            0            0
## ENSMUSG00000000197            0            0            0            0
## ENSMUSG00000000058            0            0            0            0
## ENSMUSG00000000167            0            0            0            0
## ENSMUSG00000000171            0            0            1            0
## ENSMUSG00000000056            0            0            0            0
## ENSMUSG00000000049            0            0            0            0
## ENSMUSG00000000184            0            0            1            3
## ENSMUSG00000000148            0            0            0            0
## ENSMUSG00000000078            0            0            0            1
## ENSMUSG00000000001            0            0            0            0
## ENSMUSG00000000223            0            0            0            0
## ENSMUSG00000000131            0            0            0            0
## ENSMUSG00000000183            0            0            0            0
## ENSMUSG00000000214            0            0            0            0
## ENSMUSG00000000088            0            0            1            0
## ENSMUSG00000000126            0            0            0            0
## ENSMUSG00000000031            0            0            0            0
##                    AAAACTAGTGGT
## ENSMUSG00000000028            0
## ENSMUSG00000000037            0
## ENSMUSG00000000154            0
## ENSMUSG00000000149            0
## ENSMUSG00000000202            0
## ENSMUSG00000000159            0
## ENSMUSG00000000142            0
## ENSMUSG00000000127            0
## ENSMUSG00000000085            0
## ENSMUSG00000000194            1
## ENSMUSG00000000093            0
## ENSMUSG00000000168            0
## ENSMUSG00000000120            0
## ENSMUSG00000000134            0
## ENSMUSG00000000125            0
## ENSMUSG00000000197            0
## ENSMUSG00000000058            0
## ENSMUSG00000000167            0
## ENSMUSG00000000171            0
## ENSMUSG00000000056            0
## ENSMUSG00000000049            0
## ENSMUSG00000000184            0
## ENSMUSG00000000148            0
## ENSMUSG00000000078            0
## ENSMUSG00000000001            1
## ENSMUSG00000000223            0
## ENSMUSG00000000131            0
## ENSMUSG00000000183            0
## ENSMUSG00000000214            0
## ENSMUSG00000000088            0
## ENSMUSG00000000126            0
## ENSMUSG00000000031            0
dim(seurat_campbell_hfd@raw.data[mito_genes_present, ])
## [1]   32 3778
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_hfd@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_hfd@raw.data)


## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0008024 0.0014544 0.0017117 0.0023004 0.0085837
## add percentage mitocondrial genes into metadata
seurat_campbell_hfd <- AddMetaData(object = seurat_campbell_hfd,
                    metadata = percent_mito,
                    col.name = "percent_mito")


## Look at the seurat object meta data
head(seurat_campbell_hfd@meta.data)
##              nGene nUMI   orig.ident replicate_name percent_mito
## AAAAAATGGAGC   921 1362 CAMPBELL_HFD     SRR5164440 0.0007342144
## AAAACGCGTGTC  1348 2221 CAMPBELL_HFD     SRR5164440 0.0009009009
## AAAACGGACAGA  2127 3652 CAMPBELL_HFD     SRR5164440 0.0024671053
## AAAACGTAACAG  1988 3575 CAMPBELL_HFD     SRR5164440 0.0013993843
## AAAACTAGTGGT  1723 3175 CAMPBELL_HFD     SRR5164440 0.0006301197
## AAAACTATCCCC  3099 6753 CAMPBELL_HFD     SRR5164440 0.0029638411
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_hfd,
        features.plot = c("nGene", "nUMI", "percent_mito"),
        nCol = 3,
        x.lab.rot = TRUE,
        point.size.use = 0.2
        )

## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)

GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)

Filter cells after QC

Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 5000 genes expressed.

## manual check; I already know all cells have >800 genes
table(seurat_campbell_hfd@meta.data$percent_mito < 0.004 & seurat_campbell_hfd@meta.data$nGene<5000)
## 
## FALSE  TRUE 
##   296  3482
# FALSE  TRUE 
#   296  3482 
 
## Filter cells with <0.4% percent_mito and <5000 genes
seurat_campbell_hfd <- FilterCells(object = seurat_campbell_hfd,
                    subset.names = c("nGene", "percent_mito"),
                    low.thresholds = c(800, -Inf),
                    high.thresholds = c(5000, 0.004))
 
seurat_campbell_hfd
## An object of class seurat in project CAMPBELL_HFD 
##  23685 genes across 3482 samples.
# An object of class seurat in project CAMPBELL_HFD 
#  23685 genes across 3482 samples.

Log normalise gene expression per cell

## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_hfd@data),
     breaks = 100,
     main = "Total expression before normalisation",
     xlab = "Sum of expression")

## Normalise gene expression per cell
seurat_campbell_hfd <- NormalizeData(object = seurat_campbell_hfd, normalization.method = "LogNormalize", scale.factor = 10000)

## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_hfd@data))),
     breaks = 100,
     main = "Total expression after normalisation",
     xlab = "Sum of expression")

Find genes whose expression varies between cells.

Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.

## Find variable genes by expression
seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd,
                          mean.function = ExpMean,
                          dispersion.function = LogVMR,
                          x.low.cutoff = 0.05,
                          x.high.cutoff = 3,
                          y.cutoff = 0.75,
                          num.bin = 20,
                          binning.method = "equal_width"
                          )

# number of variable genes
length(seurat_campbell_hfd@var.genes)
## [1] 1454

seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1459 variable genes

seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 2305 variable genes

seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1941 variable genes

seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 3085 variable genes

seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )

                      == 3034 variable gene

Scale normalised gene expression per cell to remove unwanted sources of variation

Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.

## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_hfd <- ScaleData(object = seurat_campbell_hfd, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
## 
## Time Elapsed:  31.4707977771759 secs
## Scaling data matrix

Principal component anlysis of variable genes.

Principal component anlysis of variable genes for use in cell clustering.

## Perform principal component analysis on variable genes
seurat_campbell_hfd <- RunPCA(object = seurat_campbell_hfd,
               pc.genes = seurat_campbell_hfd@var.genes,
               do.print = TRUE,
               pcs.print = 1:5,
               genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000026385" "ENSMUSG00000031765" "ENSMUSG00000031762"
## [4] "ENSMUSG00000001025" "ENSMUSG00000026701"
## [1] ""
## [1] "ENSMUSG00000050711" "ENSMUSG00000043388" "ENSMUSG00000027350"
## [4] "ENSMUSG00000024261" "ENSMUSG00000035864"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000050711" "ENSMUSG00000055254" "ENSMUSG00000037852"
## [4] "ENSMUSG00000043388" "ENSMUSG00000031760"
## [1] ""
## [1] "ENSMUSG00000027199" "ENSMUSG00000027375" "ENSMUSG00000037625"
## [4] "ENSMUSG00000032854" "ENSMUSG00000076439"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000032060" "ENSMUSG00000076439"
## [4] "ENSMUSG00000026830" "ENSMUSG00000032854"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000029484" "ENSMUSG00000079018"
## [4] "ENSMUSG00000041378" "ENSMUSG00000075602"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000036905" "ENSMUSG00000036887" "ENSMUSG00000036896"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] "ENSMUSG00000041378" "ENSMUSG00000079018" "ENSMUSG00000030237"
## [4] "ENSMUSG00000030235" "ENSMUSG00000020154"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
## [4] "ENSMUSG00000072674" "ENSMUSG00000027744"
## [1] ""
## [1] "ENSMUSG00000022132" "ENSMUSG00000029838" "ENSMUSG00000022528"
## [4] "ENSMUSG00000024518" "ENSMUSG00000058897"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_hfd, pcs.use = 1:9)

## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_hfd, dim.1 = 1, dim.2 = 2)

## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_hfd, dim.1 = 2, dim.2 = 3)

## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_hfd,
          pc.use = 5:18,
          cells.use = 500,
          do.balanced = TRUE,
          label.columns = FALSE)

## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_hfd <- ProjectPCA(object = seurat_campbell_hfd, do.print = TRUE)
## [1] "PC1"
##  [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
##  [4] "ENSMUSG00000031762" "ENSMUSG00000001025" "ENSMUSG00000026701"
##  [7] "ENSMUSG00000035805" "ENSMUSG00000050953" "ENSMUSG00000026728"
## [10] "ENSMUSG00000000567" "ENSMUSG00000067786" "ENSMUSG00000083138"
## [13] "ENSMUSG00000018593" "ENSMUSG00000058135" "ENSMUSG00000095538"
## [16] "ENSMUSG00000027712" "ENSMUSG00000005360" "ENSMUSG00000034467"
## [19] "ENSMUSG00000022528" "ENSMUSG00000026649" "ENSMUSG00000018102"
## [22] "ENSMUSG00000053931" "ENSMUSG00000017009" "ENSMUSG00000029838"
## [25] "ENSMUSG00000044080" "ENSMUSG00000036570" "ENSMUSG00000032231"
## [28] "ENSMUSG00000030342" "ENSMUSG00000031342" "ENSMUSG00000055254"
## [1] ""
##  [1] "ENSMUSG00000033061" "ENSMUSG00000027581" "ENSMUSG00000050711"
##  [4] "ENSMUSG00000047261" "ENSMUSG00000025468" "ENSMUSG00000022577"
##  [7] "ENSMUSG00000026576" "ENSMUSG00000024268" "ENSMUSG00000019986"
## [10] "ENSMUSG00000029223" "ENSMUSG00000021087" "ENSMUSG00000043388"
## [13] "ENSMUSG00000042750" "ENSMUSG00000040785" "ENSMUSG00000055430"
## [16] "ENSMUSG00000044349" "ENSMUSG00000027500" "ENSMUSG00000036699"
## [19] "ENSMUSG00000019923" "ENSMUSG00000027350" "ENSMUSG00000035964"
## [22] "ENSMUSG00000027273" "ENSMUSG00000031840" "ENSMUSG00000048978"
## [25] "ENSMUSG00000060188" "ENSMUSG00000000159" "ENSMUSG00000033981"
## [28] "ENSMUSG00000020297" "ENSMUSG00000022658" "ENSMUSG00000059361"
## [1] ""
## [1] ""
## [1] "PC2"
##  [1] "ENSMUSG00000018451" "ENSMUSG00000021268" "ENSMUSG00000026223"
##  [4] "ENSMUSG00000031428" "ENSMUSG00000033585" "ENSMUSG00000039278"
##  [7] "ENSMUSG00000019986" "ENSMUSG00000026576" "ENSMUSG00000040785"
## [10] "ENSMUSG00000050071" "ENSMUSG00000006373" "ENSMUSG00000042750"
## [13] "ENSMUSG00000046432" "ENSMUSG00000055430" "ENSMUSG00000025579"
## [16] "ENSMUSG00000002265" "ENSMUSG00000022577" "ENSMUSG00000029223"
## [19] "ENSMUSG00000033061" "ENSMUSG00000043384" "ENSMUSG00000072964"
## [22] "ENSMUSG00000034723" "ENSMUSG00000050711" "ENSMUSG00000055254"
## [25] "ENSMUSG00000049422" "ENSMUSG00000024268" "ENSMUSG00000025468"
## [28] "ENSMUSG00000019923" "ENSMUSG00000058254" "ENSMUSG00000037852"
## [1] ""
##  [1] "ENSMUSG00000027199" "ENSMUSG00000027375" "ENSMUSG00000037625"
##  [4] "ENSMUSG00000032854" "ENSMUSG00000031425" "ENSMUSG00000076439"
##  [7] "ENSMUSG00000031775" "ENSMUSG00000036634" "ENSMUSG00000026830"
## [10] "ENSMUSG00000022548" "ENSMUSG00000032517" "ENSMUSG00000027562"
## [13] "ENSMUSG00000050121" "ENSMUSG00000090639" "ENSMUSG00000022425"
## [16] "ENSMUSG00000020486" "ENSMUSG00000032060" "ENSMUSG00000033579"
## [19] "ENSMUSG00000041607" "ENSMUSG00000073680" "ENSMUSG00000006782"
## [22] "ENSMUSG00000046160" "ENSMUSG00000015090" "ENSMUSG00000070354"
## [25] "ENSMUSG00000037166" "ENSMUSG00000026888" "ENSMUSG00000020774"
## [28] "ENSMUSG00000022090" "ENSMUSG00000013523" "ENSMUSG00000027858"
## [1] ""
## [1] ""
## [1] "PC3"
##  [1] "ENSMUSG00000037625" "ENSMUSG00000032060" "ENSMUSG00000076439"
##  [4] "ENSMUSG00000026830" "ENSMUSG00000031425" "ENSMUSG00000032854"
##  [7] "ENSMUSG00000050121" "ENSMUSG00000006782" "ENSMUSG00000040759"
## [10] "ENSMUSG00000036634" "ENSMUSG00000032517" "ENSMUSG00000046160"
## [13] "ENSMUSG00000033579" "ENSMUSG00000015090" "ENSMUSG00000025203"
## [16] "ENSMUSG00000116358" "ENSMUSG00000073680" "ENSMUSG00000027858"
## [19] "ENSMUSG00000031342" "ENSMUSG00000068923" "ENSMUSG00000020774"
## [22] "ENSMUSG00000013523" "ENSMUSG00000022425" "ENSMUSG00000041607"
## [25] "ENSMUSG00000037166" "ENSMUSG00000030701" "ENSMUSG00000043448"
## [28] "ENSMUSG00000027562" "ENSMUSG00000050854" "ENSMUSG00000090996"
## [1] ""
##  [1] "ENSMUSG00000020077" "ENSMUSG00000029484" "ENSMUSG00000079018"
##  [4] "ENSMUSG00000041378" "ENSMUSG00000075602" "ENSMUSG00000030237"
##  [7] "ENSMUSG00000030235" "ENSMUSG00000040584" "ENSMUSG00000020154"
## [10] "ENSMUSG00000056492" "ENSMUSG00000001123" "ENSMUSG00000058715"
## [13] "ENSMUSG00000036887" "ENSMUSG00000036905" "ENSMUSG00000032359"
## [16] "ENSMUSG00000016494" "ENSMUSG00000030579" "ENSMUSG00000036896"
## [19] "ENSMUSG00000028581" "ENSMUSG00000114487" "ENSMUSG00000024621"
## [22] "ENSMUSG00000039167" "ENSMUSG00000001946" "ENSMUSG00000036256"
## [25] "ENSMUSG00000022584" "ENSMUSG00000023992" "ENSMUSG00000021423"
## [28] "ENSMUSG00000020717" "ENSMUSG00000024397" "ENSMUSG00000033960"
## [1] ""
## [1] ""
## [1] "PC4"
##  [1] "ENSMUSG00000036905" "ENSMUSG00000036887" "ENSMUSG00000036896"
##  [4] "ENSMUSG00000030579" "ENSMUSG00000058715" "ENSMUSG00000028581"
##  [7] "ENSMUSG00000024621" "ENSMUSG00000023992" "ENSMUSG00000021423"
## [10] "ENSMUSG00000024397" "ENSMUSG00000038642" "ENSMUSG00000015852"
## [13] "ENSMUSG00000036353" "ENSMUSG00000048163" "ENSMUSG00000046805"
## [16] "ENSMUSG00000059498" "ENSMUSG00000000682" "ENSMUSG00000052160"
## [19] "ENSMUSG00000054675" "ENSMUSG00000002111" "ENSMUSG00000030786"
## [22] "ENSMUSG00000040747" "ENSMUSG00000040229" "ENSMUSG00000036908"
## [25] "ENSMUSG00000044811" "ENSMUSG00000027848" "ENSMUSG00000069516"
## [28] "ENSMUSG00000018008" "ENSMUSG00000021665" "ENSMUSG00000026786"
## [1] ""
##  [1] "ENSMUSG00000041378" "ENSMUSG00000079018" "ENSMUSG00000030237"
##  [4] "ENSMUSG00000030235" "ENSMUSG00000020154" "ENSMUSG00000040584"
##  [7] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000039167"
## [10] "ENSMUSG00000001946" "ENSMUSG00000033960" "ENSMUSG00000022584"
## [13] "ENSMUSG00000036256" "ENSMUSG00000029648" "ENSMUSG00000114487"
## [16] "ENSMUSG00000020717" "ENSMUSG00000000805" "ENSMUSG00000039349"
## [19] "ENSMUSG00000054690" "ENSMUSG00000042116" "ENSMUSG00000031239"
## [22] "ENSMUSG00000027435" "ENSMUSG00000006386" "ENSMUSG00000026814"
## [25] "ENSMUSG00000061353" "ENSMUSG00000062960" "ENSMUSG00000024168"
## [28] "ENSMUSG00000024140" "ENSMUSG00000026193" "ENSMUSG00000051669"
## [1] ""
## [1] ""
## [1] "PC5"
##  [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
##  [4] "ENSMUSG00000072674" "ENSMUSG00000027744" "ENSMUSG00000095304"
##  [7] "ENSMUSG00000045655" "ENSMUSG00000108841" "ENSMUSG00000047671"
## [10] "ENSMUSG00000044772" "ENSMUSG00000020473" "ENSMUSG00000032595"
## [13] "ENSMUSG00000110332" "ENSMUSG00000027800" "ENSMUSG00000027867"
## [16] "ENSMUSG00000029182" "ENSMUSG00000038370" "ENSMUSG00000072473"
## [19] "ENSMUSG00000085416" "ENSMUSG00000051606" "ENSMUSG00000047139"
## [22] "ENSMUSG00000047394" "ENSMUSG00000037716" "ENSMUSG00000027360"
## [25] "ENSMUSG00000056174" "ENSMUSG00000046242" "ENSMUSG00000044475"
## [28] "ENSMUSG00000041323" "ENSMUSG00000060981" "ENSMUSG00000086742"
## [1] ""
##  [1] "ENSMUSG00000022132" "ENSMUSG00000029838" "ENSMUSG00000022528"
##  [4] "ENSMUSG00000024518" "ENSMUSG00000058897" "ENSMUSG00000021250"
##  [7] "ENSMUSG00000063564" "ENSMUSG00000007682" "ENSMUSG00000018451"
## [10] "ENSMUSG00000045005" "ENSMUSG00000024190" "ENSMUSG00000035686"
## [13] "ENSMUSG00000033737" "ENSMUSG00000038418" "ENSMUSG00000034810"
## [16] "ENSMUSG00000017390" "ENSMUSG00000047786" "ENSMUSG00000061718"
## [19] "ENSMUSG00000028195" "ENSMUSG00000030629" "ENSMUSG00000003545"
## [22] "ENSMUSG00000027004" "ENSMUSG00000055653" "ENSMUSG00000052684"
## [25] "ENSMUSG00000034640" "ENSMUSG00000022419" "ENSMUSG00000041891"
## [28] "ENSMUSG00000053560" "ENSMUSG00000050708" "ENSMUSG00000030428"
## [1] ""
## [1] ""

Determine statistically significant principal components

## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_hfd <- JackStraw(object = seurat_campbell_hfd,
                  num.replicate = 100,
                  display.progress = TRUE
                  )
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  2.13899685939153 mins
# Maximum number of PCs allowed = 20.


## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_hfd, PCs = 1:20)
## Warning: Removed 20637 rows containing missing values (geom_point).

## An object of class seurat in project CAMPBELL_HFD 
##  23685 genes across 3482 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot 
PCElbowPlot(object = seurat_campbell_hfd)

Cell clustering

## Cluster cells by PC
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
         no.legend = TRUE,
         do.label = TRUE)

## 1454 variable genes, 20 PC = 14 clusters
## 1454 variable genes, 14 PC = 14 clusters

table(seurat_campbell_hfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13 
## 744 629 620 308 225 168 148 139 137 133  76  66  51  38
#######
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14, 
    resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
         no.legend = TRUE,
         do.label = TRUE)

##  1454 variable genes, 20 PC = 16 clusters
## 1454 variable genes, 14 PC = 16 clusters

table(seurat_campbell_hfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15 
## 738 623 400 226 223 177 156 154 148 139 137 133  76  63  51  38
#######
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14, 
    resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
         no.legend = TRUE,
         do.label = TRUE)

##  1454 variable genes, 20 PC = 20 clusters
## 1454 variable genes, 14 PC = 18 clusters

table(seurat_campbell_hfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 549 400 363 256 227 223 198 167 157 154 148 140 137 133  76  63  51  40
#######
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14, 
    resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
         no.legend = TRUE,
         do.label = TRUE)

##  1454 variable genes, 20 PC = 21 clusters
## 1454 variable genes, 14 PC = 21 clusters

table(seurat_campbell_hfd@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 344 331 244 239 232 227 202 199 191 174 157 152 139 137 133 118  76  66 
##  18  19  20 
##  51  40  30
## Cluster cells using final parameters (1454 genes, 14 PC, resolution = 1.0)
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14, resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)


## Produce t-SNE of final parameters:
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)

Use 1454 variable genes and 14 principal components with a resolution of 1.0 (Changing number of PC makes almost no difference to the clustering). This gives a total of 16 clusters.

Finding differentially expressed genes between cell clusters (cluster biomarkers)

## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_hfd_biomarkers <- FindAllMarkers(object = seurat_campbell_hfd, only.pos = FALSE, min.pct = 0.2)


## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_hfd_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 160 x 7
## # Groups:   cluster [16]
##        p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene              
##        <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>             
##  1 4.15e-142     0.909 0.943 0.503 9.82e-138 0       ENSMUSG00000000159
##  2 7.49e-118     0.658 0.973 0.582 1.77e-113 0       ENSMUSG00000047261
##  3 1.07e-106     0.603 0.978 0.774 2.53e-102 0       ENSMUSG00000055430
##  4 9.01e-101     0.605 0.932 0.499 2.14e- 96 0       ENSMUSG00000027500
##  5 8.43e- 93     0.570 0.881 0.427 2.00e- 88 0       ENSMUSG00000048978
##  6 6.28e- 91     0.596 0.64  0.227 1.49e- 86 0       ENSMUSG00000021032
##  7 3.92e- 23     0.896 0.215 0.087 9.27e- 19 0       ENSMUSG00000032291
##  8 1.10e- 22     0.701 0.375 0.203 2.61e- 18 0       ENSMUSG00000020660
##  9 9.08e- 22     0.683 0.404 0.229 2.15e- 17 0       ENSMUSG00000021647
## 10 6.81e- 17     1.09  0.289 0.158 1.61e- 12 0       ENSMUSG00000004366
## # ... with 150 more rows
write.csv(as.data.frame(seurat_campbell_hfd_biomarkers), file = "seurat_campbell_hfd_biomarkers.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_hfd_biomarkers.csv", quote = FALSE)




## Perform ROC DE test. This can take a long time.
seurat_campbell_hfd_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_hfd, only.pos = FALSE, min.pct = 0.2, test.use = "roc")

top10_seurat_campbell_markers_ROC<- seurat_campbell_hfd_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 160 x 9
## # Groups:   cluster [16]
##    myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene      
##    <dbl>    <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>     
##  1 0.794    0.909 0.588     0.909 0.943 0.503        NA 0       ENSMUSG00…
##  2 0.771    0.658 0.542     0.658 0.973 0.582        NA 0       ENSMUSG00…
##  3 0.762    0.603 0.524     0.603 0.978 0.774        NA 0       ENSMUSG00…
##  4 0.746    0.605 0.492     0.605 0.932 0.499        NA 0       ENSMUSG00…
##  5 0.737    0.548 0.474     0.548 0.991 0.771        NA 0       ENSMUSG00…
##  6 0.736    0.558 0.472     0.558 0.921 0.474        NA 0       ENSMUSG00…
##  7 0.731    0.570 0.462     0.570 0.881 0.427        NA 0       ENSMUSG00…
##  8 0.729    0.551 0.458     0.551 0.935 0.561        NA 0       ENSMUSG00…
##  9 0.727    0.533 0.454     0.533 0.972 0.547        NA 0       ENSMUSG00…
## 10 0.722    0.555 0.444     0.555 0.989 0.88         NA 0       ENSMUSG00…
## # ... with 150 more rows
write.csv(as.data.frame(seurat_campbell_hfd_biomarkers_ROC), file = "seurat_campbell_hfd_biomarkers_ROC.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_hfd_biomarkers_ROC.csv", quote = FALSE)


## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_hfd,
          genes.use = top10_seurat_campbell_markers$gene,
          slim.col.label = TRUE,
          remove.key = TRUE)

Save seurat object of chow mice.

## save seurat object as .rds 
saveRDS(seurat_campbell_hfd, file = "./seurat_campbell_hfd_final.rds")

Analysis of Campbell scRNAseq data for mice fasted for 24h and then REFED for 2h (1 replicate) using seurat

Load REFED mouse data

## Load seurat object
seurat_campbell_refed<- readRDS(file = "./seurat_campbell_refed_just_created.rds")

seurat_campbell_refed
## An object of class seurat in project CAMPBELL_REFED 
##  19267 genes across 2375 samples.

Quality control on CHOW mouse data

## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)


## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url


## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_refed@raw.data[mito_genes, ]

mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]

seurat_campbell_refed@raw.data[mito_genes_present, 1:5]
##                    AAAAATACCGGC AAAAATAGGCGT AAAAATAGGGTT AAAACATTACTT
## ENSMUSG00000000028            0            0            0            0
## ENSMUSG00000000037            0            0            0            0
## ENSMUSG00000000154            0            0            0            0
## ENSMUSG00000000149            0            0            0            0
## ENSMUSG00000000214            0            0            0            1
## ENSMUSG00000000159            0            0            2            2
## ENSMUSG00000000142            0            0            0            0
## ENSMUSG00000000127            0            1            1            0
## ENSMUSG00000000085            1            0            0            0
## ENSMUSG00000000197            0            0            1            1
## ENSMUSG00000000093            0            0            0            0
## ENSMUSG00000000168            0            0            0            0
## ENSMUSG00000000120            0            0            0            0
## ENSMUSG00000000134            0            0            0            0
## ENSMUSG00000000125            0            0            0            0
## ENSMUSG00000000202            0            0            0            0
## ENSMUSG00000000058            0            0            0            0
## ENSMUSG00000000167            0            0            0            0
## ENSMUSG00000000171            1            0            0            0
## ENSMUSG00000000056            0            1            0            0
## ENSMUSG00000000049            0            0            0            0
## ENSMUSG00000000194            0            0            0            0
## ENSMUSG00000000148            0            0            0            0
## ENSMUSG00000000078            1            0            0            0
## ENSMUSG00000000001            0            0            0            0
## ENSMUSG00000000247            0            0            0            0
## ENSMUSG00000000131            0            0            0            0
## ENSMUSG00000000184            0            1            0            0
## ENSMUSG00000000223            0            0            2            0
## ENSMUSG00000000088            0            0            0            0
## ENSMUSG00000000126            0            0            0            0
## ENSMUSG00000000031            0            0            0            0
##                    AAAAGACTAGTA
## ENSMUSG00000000028            0
## ENSMUSG00000000037            0
## ENSMUSG00000000154            0
## ENSMUSG00000000149            0
## ENSMUSG00000000214            0
## ENSMUSG00000000159            1
## ENSMUSG00000000142            0
## ENSMUSG00000000127            0
## ENSMUSG00000000085            0
## ENSMUSG00000000197            0
## ENSMUSG00000000093            0
## ENSMUSG00000000168            0
## ENSMUSG00000000120            0
## ENSMUSG00000000134            0
## ENSMUSG00000000125            0
## ENSMUSG00000000202            0
## ENSMUSG00000000058            0
## ENSMUSG00000000167            0
## ENSMUSG00000000171            0
## ENSMUSG00000000056            0
## ENSMUSG00000000049            0
## ENSMUSG00000000194            0
## ENSMUSG00000000148            0
## ENSMUSG00000000078            0
## ENSMUSG00000000001            0
## ENSMUSG00000000247            0
## ENSMUSG00000000131            0
## ENSMUSG00000000184            0
## ENSMUSG00000000223            0
## ENSMUSG00000000088            0
## ENSMUSG00000000126            0
## ENSMUSG00000000031            0
dim(seurat_campbell_refed@raw.data[mito_genes_present, ])
## [1]   32 2375
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_refed@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_refed@raw.data)


## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0008132 0.0015576 0.0018138 0.0025196 0.0097259
## add percentage mitocondrial genes into metadata
seurat_campbell_refed <- AddMetaData(object = seurat_campbell_refed,
                    metadata = percent_mito,
                    col.name = "percent_mito")


## Look at the seurat object meta data
head(seurat_campbell_refed@meta.data)
##              nGene nUMI     orig.ident replicate_name percent_mito
## AAAAATACCGGC  1271 2224 CAMPBELL_REFED     SRR5164442 0.0013519603
## AAAAATAGGCGT  1088 1900 CAMPBELL_REFED     SRR5164442 0.0015797788
## AAAAATAGGGTT  3056 6629 CAMPBELL_REFED     SRR5164442 0.0009057971
## AAAACATTACTT   841 1535 CAMPBELL_REFED     SRR5164442 0.0026058632
## AAAAGACTAGTA  1195 1925 CAMPBELL_REFED     SRR5164442 0.0005208333
## AAAAGATCATAC   966 1827 CAMPBELL_REFED     SRR5164442 0.0005473454
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_refed,
        features.plot = c("nGene", "nUMI", "percent_mito"),
        nCol = 3,
        x.lab.rot = TRUE,
        point.size.use = 0.2
        )

## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)

GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)

Filter cells after QC

Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 3500 genes expressed.

## manual check; I already know all cells have >800 genes
table(seurat_campbell_refed@meta.data$percent_mito < 0.004 & seurat_campbell_refed@meta.data$nGene<3500)
## 
## FALSE  TRUE 
##   201  2174
# FALSE  TRUE 
#   201  2174 
 
## Filter cells with <0.4% percent_mito and <3500 genes
seurat_campbell_refed <- FilterCells(object = seurat_campbell_refed,
                    subset.names = c("nGene", "percent_mito"),
                    low.thresholds = c(800, -Inf),
                    high.thresholds = c(3500, 0.004))
 
seurat_campbell_refed
## An object of class seurat in project CAMPBELL_REFED 
##  19267 genes across 2174 samples.
# An object of class seurat in project CAMPBELL_REFED 
#  19267 genes across 2174 samples.

Log normalise gene expression per cell

## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_refed@data),
     breaks = 100,
     main = "Total expression before normalisation",
     xlab = "Sum of expression")

## Normalise gene expression per cell
seurat_campbell_refed <- NormalizeData(object = seurat_campbell_refed, normalization.method = "LogNormalize", scale.factor = 10000)

## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_refed@data))),
     breaks = 100,
     main = "Total expression after normalisation",
     xlab = "Sum of expression")

Find genes whose expression varies between cells.

Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.

## Find variable genes by expression
seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed,
                          mean.function = ExpMean,
                          dispersion.function = LogVMR,
                          x.low.cutoff = 0.05,
                          x.high.cutoff = 3,
                          y.cutoff = 0.75,
                          num.bin = 20,
                          binning.method = "equal_width"
                          )

# number of variable genes
length(seurat_campbell_refed@var.genes)
## [1] 1451

seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1456 variable genes

seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 2293 variable genes

seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )

                      == 1916 variable genes

seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )

                      == 2916 variable genes

seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )

                      == 2831 variable gene

Scale normalised gene expression per cell to remove unwanted sources of variation

Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.

## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_refed <- ScaleData(object = seurat_campbell_refed, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
## 
## Time Elapsed:  22.1721687316895 secs
## Scaling data matrix

Principal component anlysis of variable genes.

Principal component anlysis of variable genes for use in cell clustering.

## Perform principal component analysis on variable genes
seurat_campbell_refed <- RunPCA(object = seurat_campbell_refed,
               pc.genes = seurat_campbell_refed@var.genes,
               do.print = TRUE,
               pcs.print = 1:5,
               genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000027500" "ENSMUSG00000027350" "ENSMUSG00000035864"
## [4] "ENSMUSG00000040856" "ENSMUSG00000026787"
## [1] ""
## [1] "ENSMUSG00000026385" "ENSMUSG00000031765" "ENSMUSG00000050953"
## [4] "ENSMUSG00000018593" "ENSMUSG00000000567"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000079018" "ENSMUSG00000041378" "ENSMUSG00000040584"
## [4] "ENSMUSG00000020154" "ENSMUSG00000056492"
## [1] ""
## [1] "ENSMUSG00000055254" "ENSMUSG00000067786" "ENSMUSG00000000567"
## [4] "ENSMUSG00000035805" "ENSMUSG00000026701"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000027350" "ENSMUSG00000027800" "ENSMUSG00000075602"
## [4] "ENSMUSG00000023175" "ENSMUSG00000038370"
## [1] ""
## [1] "ENSMUSG00000037625" "ENSMUSG00000036634" "ENSMUSG00000031425"
## [4] "ENSMUSG00000032517" "ENSMUSG00000032854"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000020423" "ENSMUSG00000003545" "ENSMUSG00000022528"
## [4] "ENSMUSG00000022132" "ENSMUSG00000026185"
## [1] ""
## [1] "ENSMUSG00000043164" "ENSMUSG00000070306" "ENSMUSG00000094800"
## [4] "ENSMUSG00000110332" "ENSMUSG00000020473"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000029838" "ENSMUSG00000047786" "ENSMUSG00000055254"
## [4] "ENSMUSG00000063564" "ENSMUSG00000007682"
## [1] ""
## [1] "ENSMUSG00000056973" "ENSMUSG00000028298" "ENSMUSG00000032532"
## [4] "ENSMUSG00000032081" "ENSMUSG00000045394"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_refed, pcs.use = 1:9)

## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_refed, dim.1 = 1, dim.2 = 2)

## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_refed, dim.1 = 2, dim.2 = 3)

## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter

# Ignore warnings!


## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_refed,
          pc.use = 5:18,
          cells.use = 500,
          do.balanced = TRUE,
          label.columns = FALSE)

## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_refed <- ProjectPCA(object = seurat_campbell_refed, do.print = TRUE)
## [1] "PC1"
##  [1] "ENSMUSG00000050711" "ENSMUSG00000033061" "ENSMUSG00000044349"
##  [4] "ENSMUSG00000021268" "ENSMUSG00000019986" "ENSMUSG00000055430"
##  [7] "ENSMUSG00000027581" "ENSMUSG00000021087" "ENSMUSG00000029223"
## [10] "ENSMUSG00000040785" "ENSMUSG00000026576" "ENSMUSG00000043388"
## [13] "ENSMUSG00000024268" "ENSMUSG00000047261" "ENSMUSG00000024261"
## [16] "ENSMUSG00000027273" "ENSMUSG00000019923" "ENSMUSG00000042750"
## [19] "ENSMUSG00000025468" "ENSMUSG00000000159" "ENSMUSG00000036699"
## [22] "ENSMUSG00000002265" "ENSMUSG00000022577" "ENSMUSG00000107169"
## [25] "ENSMUSG00000024423" "ENSMUSG00000025579" "ENSMUSG00000027500"
## [28] "ENSMUSG00000003363" "ENSMUSG00000027350" "ENSMUSG00000033981"
## [1] ""
##  [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
##  [4] "ENSMUSG00000050953" "ENSMUSG00000018593" "ENSMUSG00000000567"
##  [7] "ENSMUSG00000026728" "ENSMUSG00000001025" "ENSMUSG00000058135"
## [10] "ENSMUSG00000031762" "ENSMUSG00000026701" "ENSMUSG00000035805"
## [13] "ENSMUSG00000030342" "ENSMUSG00000005360" "ENSMUSG00000008575"
## [16] "ENSMUSG00000067786" "ENSMUSG00000018102" "ENSMUSG00000034467"
## [19] "ENSMUSG00000095538" "ENSMUSG00000028565" "ENSMUSG00000017390"
## [22] "ENSMUSG00000053931" "ENSMUSG00000032231" "ENSMUSG00000026547"
## [25] "ENSMUSG00000028517" "ENSMUSG00000036570" "ENSMUSG00000028195"
## [28] "ENSMUSG00000021250" "ENSMUSG00000044080" "ENSMUSG00000062078"
## [1] ""
## [1] ""
## [1] "PC2"
##  [1] "ENSMUSG00000079018" "ENSMUSG00000041378" "ENSMUSG00000040584"
##  [4] "ENSMUSG00000020154" "ENSMUSG00000056492" "ENSMUSG00000033960"
##  [7] "ENSMUSG00000075602" "ENSMUSG00000030237" "ENSMUSG00000001946"
## [10] "ENSMUSG00000042745" "ENSMUSG00000030235" "ENSMUSG00000039167"
## [13] "ENSMUSG00000029648" "ENSMUSG00000020717" "ENSMUSG00000054404"
## [16] "ENSMUSG00000061353" "ENSMUSG00000034738" "ENSMUSG00000006386"
## [19] "ENSMUSG00000022584" "ENSMUSG00000031871" "ENSMUSG00000029802"
## [22] "ENSMUSG00000040732" "ENSMUSG00000114487" "ENSMUSG00000027435"
## [25] "ENSMUSG00000020077" "ENSMUSG00000062960" "ENSMUSG00000032035"
## [28] "ENSMUSG00000054690" "ENSMUSG00000031239" "ENSMUSG00000042116"
## [1] ""
##  [1] "ENSMUSG00000018451" "ENSMUSG00000055254" "ENSMUSG00000067786"
##  [4] "ENSMUSG00000000567" "ENSMUSG00000035805" "ENSMUSG00000026701"
##  [7] "ENSMUSG00000001025" "ENSMUSG00000026385" "ENSMUSG00000031760"
## [10] "ENSMUSG00000017390" "ENSMUSG00000031762" "ENSMUSG00000005360"
## [13] "ENSMUSG00000025666" "ENSMUSG00000034467" "ENSMUSG00000021379"
## [16] "ENSMUSG00000058135" "ENSMUSG00000031765" "ENSMUSG00000050953"
## [19] "ENSMUSG00000095538" "ENSMUSG00000004558" "ENSMUSG00000052727"
## [22] "ENSMUSG00000026223" "ENSMUSG00000036570" "ENSMUSG00000030428"
## [25] "ENSMUSG00000031342" "ENSMUSG00000024411" "ENSMUSG00000026649"
## [28] "ENSMUSG00000037852" "ENSMUSG00000026546" "ENSMUSG00000005089"
## [1] ""
## [1] ""
## [1] "PC3"
##  [1] "ENSMUSG00000021268" "ENSMUSG00000050711" "ENSMUSG00000040785"
##  [4] "ENSMUSG00000043384" "ENSMUSG00000033061" "ENSMUSG00000018451"
##  [7] "ENSMUSG00000019986" "ENSMUSG00000026223" "ENSMUSG00000026576"
## [10] "ENSMUSG00000044349" "ENSMUSG00000027350" "ENSMUSG00000072964"
## [13] "ENSMUSG00000006373" "ENSMUSG00000036438" "ENSMUSG00000027800"
## [16] "ENSMUSG00000024261" "ENSMUSG00000075602" "ENSMUSG00000031428"
## [19] "ENSMUSG00000002265" "ENSMUSG00000022661" "ENSMUSG00000023175"
## [22] "ENSMUSG00000038370" "ENSMUSG00000042745" "ENSMUSG00000070802"
## [25] "ENSMUSG00000042750" "ENSMUSG00000056492" "ENSMUSG00000032181"
## [28] "ENSMUSG00000027712" "ENSMUSG00000024268" "ENSMUSG00000041378"
## [1] ""
##  [1] "ENSMUSG00000037625" "ENSMUSG00000036634" "ENSMUSG00000031425"
##  [4] "ENSMUSG00000032517" "ENSMUSG00000032854" "ENSMUSG00000076439"
##  [7] "ENSMUSG00000041607" "ENSMUSG00000006782" "ENSMUSG00000026830"
## [10] "ENSMUSG00000027199" "ENSMUSG00000031775" "ENSMUSG00000115529"
## [13] "ENSMUSG00000032060" "ENSMUSG00000046160" "ENSMUSG00000020486"
## [16] "ENSMUSG00000056966" "ENSMUSG00000013523" "ENSMUSG00000038173"
## [19] "ENSMUSG00000022425" "ENSMUSG00000027562" "ENSMUSG00000015090"
## [22] "ENSMUSG00000090639" "ENSMUSG00000050121" "ENSMUSG00000032556"
## [25] "ENSMUSG00000027858" "ENSMUSG00000015149" "ENSMUSG00000070354"
## [28] "ENSMUSG00000027375" "ENSMUSG00000047976" "ENSMUSG00000040759"
## [1] ""
## [1] ""
## [1] "PC4"
##  [1] "ENSMUSG00000020423" "ENSMUSG00000003545" "ENSMUSG00000022528"
##  [4] "ENSMUSG00000022132" "ENSMUSG00000026185" "ENSMUSG00000007682"
##  [7] "ENSMUSG00000052837" "ENSMUSG00000028195" "ENSMUSG00000038418"
## [10] "ENSMUSG00000021250" "ENSMUSG00000033737" "ENSMUSG00000030629"
## [13] "ENSMUSG00000028298" "ENSMUSG00000058897" "ENSMUSG00000052387"
## [16] "ENSMUSG00000029838" "ENSMUSG00000047786" "ENSMUSG00000019997"
## [19] "ENSMUSG00000032532" "ENSMUSG00000045005" "ENSMUSG00000044786"
## [22] "ENSMUSG00000032081" "ENSMUSG00000063564" "ENSMUSG00000034640"
## [25] "ENSMUSG00000003949" "ENSMUSG00000019960" "ENSMUSG00000034810"
## [28] "ENSMUSG00000045394" "ENSMUSG00000064351" "ENSMUSG00000022419"
## [1] ""
##  [1] "ENSMUSG00000043164" "ENSMUSG00000070306" "ENSMUSG00000094800"
##  [4] "ENSMUSG00000110332" "ENSMUSG00000020473" "ENSMUSG00000038370"
##  [7] "ENSMUSG00000072674" "ENSMUSG00000032595" "ENSMUSG00000041323"
## [10] "ENSMUSG00000027800" "ENSMUSG00000045655" "ENSMUSG00000044772"
## [13] "ENSMUSG00000033208" "ENSMUSG00000047394" "ENSMUSG00000108841"
## [16] "ENSMUSG00000026683" "ENSMUSG00000047139" "ENSMUSG00000022037"
## [19] "ENSMUSG00000021879" "ENSMUSG00000027744" "ENSMUSG00000029182"
## [22] "ENSMUSG00000051606" "ENSMUSG00000021270" "ENSMUSG00000021950"
## [25] "ENSMUSG00000047361" "ENSMUSG00000044475" "ENSMUSG00000029309"
## [28] "ENSMUSG00000026301" "ENSMUSG00000056174" "ENSMUSG00000021087"
## [1] ""
## [1] ""
## [1] "PC5"
##  [1] "ENSMUSG00000018451" "ENSMUSG00000029838" "ENSMUSG00000047786"
##  [4] "ENSMUSG00000055254" "ENSMUSG00000063564" "ENSMUSG00000007682"
##  [7] "ENSMUSG00000045005" "ENSMUSG00000017390" "ENSMUSG00000030428"
## [10] "ENSMUSG00000056380" "ENSMUSG00000015222" "ENSMUSG00000058897"
## [13] "ENSMUSG00000035686" "ENSMUSG00000027004" "ENSMUSG00000064339"
## [16] "ENSMUSG00000031342" "ENSMUSG00000033737" "ENSMUSG00000038156"
## [19] "ENSMUSG00000005360" "ENSMUSG00000052727" "ENSMUSG00000031626"
## [22] "ENSMUSG00000030905" "ENSMUSG00000058254" "ENSMUSG00000021087"
## [25] "ENSMUSG00000021732" "ENSMUSG00000055430" "ENSMUSG00000026701"
## [28] "ENSMUSG00000005089" "ENSMUSG00000040856" "ENSMUSG00000093460"
## [1] ""
##  [1] "ENSMUSG00000056973" "ENSMUSG00000028298" "ENSMUSG00000032532"
##  [4] "ENSMUSG00000032081" "ENSMUSG00000045394" "ENSMUSG00000015053"
##  [7] "ENSMUSG00000027857" "ENSMUSG00000023043" "ENSMUSG00000066720"
## [10] "ENSMUSG00000039994" "ENSMUSG00000021194" "ENSMUSG00000028023"
## [13] "ENSMUSG00000051367" "ENSMUSG00000049382" "ENSMUSG00000021508"
## [16] "ENSMUSG00000021506" "ENSMUSG00000018569" "ENSMUSG00000052974"
## [19] "ENSMUSG00000031380" "ENSMUSG00000054889" "ENSMUSG00000027350"
## [22] "ENSMUSG00000026185" "ENSMUSG00000000037" "ENSMUSG00000036192"
## [25] "ENSMUSG00000040569" "ENSMUSG00000037664" "ENSMUSG00000039943"
## [28] "ENSMUSG00000068154" "ENSMUSG00000046743" "ENSMUSG00000017723"
## [1] ""
## [1] ""

Determine statistically significant principal components

## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_refed <- JackStraw(object = seurat_campbell_refed,
                  num.replicate = 100,
                  display.progress = TRUE
                  )
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
## Time Elapsed:  1.65351190964381 mins
# Maximum number of PCs allowed = 20.


## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_refed, PCs = 1:20)
## Warning: Removed 20801 rows containing missing values (geom_point).

## An object of class seurat in project CAMPBELL_REFED 
##  19267 genes across 2174 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot 
PCElbowPlot(object = seurat_campbell_refed)

Cell clustering

## Cluster cells by PC
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
         no.legend = TRUE,
         do.label = TRUE)

## 1451 variable genes, 13 PC = 9 clusters

table(seurat_campbell_refed@ident)
## 
##   0   1   2   3   4   5   6   7   8 
## 732 385 334 304 112 100  96  75  36
#######
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13, 
    resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
         no.legend = TRUE,
         do.label = TRUE)

##  1451 variable genes, 20 PC = 11 clusters

table(seurat_campbell_refed@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10 
## 376 374 370 324 233 111 100  95  79  76  36
#######
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13, 
    resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
         no.legend = TRUE,
         do.label = TRUE)

##  1451 variable genes, 20 PC = 14 clusters

table(seurat_campbell_refed@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13 
## 351 335 263 224 219 206 111 100  80  77  77  76  36  19
#######
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13, 
    resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)

PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function    Algorithm         n.start         n.iter
##      1                   1                 100             10
## -----------------------------------------------------------------------------
## Reduction used          k.param          prune.SNN
##      pca                 30                0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)

# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
         no.legend = TRUE,
         do.label = TRUE)

##  1451 variable genes, 20 PC = 15 clusters

table(seurat_campbell_refed@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14 
## 340 255 205 204 203 183 181 111 100  81  77  76  73  49  36
## Cluster cells using final parameters (1451 genes, 13 PC, resolution = 1.0)
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13, resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)


## Produce t-SNE of final parameters:
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)

Use 1451 variable genes and 13 principal components with a resolution of 1.0. This gives a total of 11 clusters.

Finding differentially expressed genes between cell clusters (cluster biomarkers)

## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_refed_biomarkers <- FindAllMarkers(object = seurat_campbell_refed, only.pos = FALSE, min.pct = 0.2)


## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_refed_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 110 x 7
## # Groups:   cluster [11]
##        p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene              
##        <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>             
##  1 1.13e-283      2.92 0.995 0.152 2.17e-279 0       ENSMUSG00000005705
##  2 5.73e-242      3.36 0.992 0.282 1.10e-237 0       ENSMUSG00000029819
##  3 3.71e-211      1.88 0.657 0.042 7.14e-207 0       ENSMUSG00000026834
##  4 2.23e-187      2.04 0.71  0.084 4.29e-183 0       ENSMUSG00000021091
##  5 1.06e-132      1.44 0.819 0.231 2.04e-128 0       ENSMUSG00000051159
##  6 3.36e-108      1.19 0.455 0.049 6.47e-104 0       ENSMUSG00000021685
##  7 1.88e-100      1.17 0.803 0.284 3.62e- 96 0       ENSMUSG00000027523
##  8 6.34e- 92      1.10 0.434 0.058 1.22e- 87 0       ENSMUSG00000055737
##  9 3.46e- 86      1.17 0.622 0.169 6.67e- 82 0       ENSMUSG00000054667
## 10 4.68e- 81      1.23 0.726 0.272 9.02e- 77 0       ENSMUSG00000026360
## # ... with 100 more rows
write.csv(as.data.frame(seurat_campbell_refed_biomarkers), file = "seurat_campbell_refed_biomarkers.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_refed_biomarkers.csv", quote = FALSE)




## Perform ROC DE test. This can take a long time.
seurat_campbell_refed_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_refed, only.pos = FALSE, min.pct = 0.2, test.use = "roc")

top10_seurat_campbell_markers_ROC<- seurat_campbell_refed_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 95 x 9
## # Groups:   cluster [11]
##    myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene      
##    <dbl>    <dbl> <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>     
##  1 0.983     3.36 0.966      3.36 0.992 0.282        NA 0       ENSMUSG00…
##  2 0.977     2.92 0.954      2.92 0.995 0.152        NA 0       ENSMUSG00…
##  3 0.836     1.44 0.672      1.44 0.819 0.231        NA 0       ENSMUSG00…
##  4 0.829     2.04 0.658      2.04 0.71  0.084        NA 0       ENSMUSG00…
##  5 0.814     1.88 0.628      1.88 0.657 0.042        NA 0       ENSMUSG00…
##  6 0.803     1.17 0.606      1.17 0.803 0.284        NA 0       ENSMUSG00…
##  7 0.766     1.23 0.532      1.23 0.726 0.272        NA 0       ENSMUSG00…
##  8 0.744     1.17 0.488      1.17 0.622 0.169        NA 0       ENSMUSG00…
##  9 0.722     1.10 0.444      1.10 0.62  0.231        NA 0       ENSMUSG00…
## 10 0.704     1.19 0.408      1.19 0.455 0.049        NA 0       ENSMUSG00…
## # ... with 85 more rows
write.csv(as.data.frame(seurat_campbell_refed_biomarkers_ROC), file = "seurat_campbell_refed_biomarkers_ROC.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_refed_biomarkers_ROC.csv", quote = FALSE)


## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_refed,
          genes.use = top10_seurat_campbell_markers$gene,
          slim.col.label = TRUE,
          remove.key = TRUE)

Save seurat object of chow mice.

## save seurat object as .rds 
saveRDS(seurat_campbell_refed, file = "./seurat_campbell_refed_final.rds")